In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from imblearn.over_sampling import SMOTE


In [3]:
data = pd.read_csv('water_quality.csv')
data.fillna(data.median(), inplace=True)


In [4]:
X = data.drop('Potability', axis=1)
y = data['Potability']

smote = SMOTE()
X_res, y_res = smote.fit_resample(X, y)

scaler = StandardScaler()
X_res = scaler.fit_transform(X_res)


In [5]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

print("\nModel Validation Results (Accuracy):")
for model_name, model in models.items():
    scores = cross_val_score(model, X_res, y_res, cv=10, scoring='accuracy')
    print(f"{model_name} - Accuracy: {scores.mean():.2f}")

X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)



Model Validation Results (Accuracy):
Logistic Regression - Accuracy: 0.51
Decision Tree - Accuracy: 0.60
Random Forest - Accuracy: 0.67


In [6]:
print("\nDetailed Model Evaluation:")
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(f"Precision: {precision_score(y_test, y_pred):.2f}")
    print(f"Recall: {recall_score(y_test, y_pred):.2f}")
    print(f"F1-Score: {f1_score(y_test, y_pred):.2f}")
    
    if hasattr(model, "predict_proba"):
        y_proba = model.predict_proba(X_test)[:, 1]
        print(f"AUC Score: {roc_auc_score(y_test, y_proba):.2f}")


Detailed Model Evaluation:

Logistic Regression Results:
Accuracy: 0.51
Precision: 0.52
Recall: 0.46
F1-Score: 0.49
AUC Score: 0.51

Decision Tree Results:
Accuracy: 0.61
Precision: 0.61
Recall: 0.64
F1-Score: 0.62
AUC Score: 0.61

Random Forest Results:
Accuracy: 0.71
Precision: 0.72
Recall: 0.71
F1-Score: 0.71
AUC Score: 0.78
