In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# 1. Load the data
df = pd.read_csv("water_potability.csv")

# 2. Inspect missing values
print("Missing values:\n", df.isnull().sum())

# 3. Handle missing values with (mean imputation)
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)


# 5. Split features and target x,y
X = df_imputed.drop("Potability", axis=1)
y = df_imputed["Potability"]

# 6. Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=72)

Missing values:
 ph                 491
Hardness             0
Solids               0
Chloramines          0
Sulfate            781
Conductivity         0
Organic_carbon       0
Trihalomethanes    162
Turbidity            0
Potability           0
dtype: int64


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

model = Sequential([
    Input(shape=(X_train.shape[1],)),  # <-- Add Input layer explicitly
    Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=1e-4, l2=1e-3)),
    Dropout(0.3),  
    Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=1e-4, l2=1e-3)),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

optimizer = RMSprop(learning_rate=0.0008)

model.compile(
    loss='binary_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

y_pred_nn = model.predict(X_test)
y_pred_classes = (y_pred_nn > 0.5).astype(int)

acc = accuracy_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)

print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Epoch 1/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.5556 - loss: 0.8615 - val_accuracy: 0.6317 - val_loss: 0.8214
Epoch 2/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6142 - loss: 0.8245 - val_accuracy: 0.6489 - val_loss: 0.8010
Epoch 3/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6174 - loss: 0.8062 - val_accuracy: 0.6355 - val_loss: 0.7870
Epoch 4/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6566 - loss: 0.7673 - val_accuracy: 0.6508 - val_loss: 0.7660
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6523 - loss: 0.7577 - val_accuracy: 0.6489 - val_loss: 0.7578
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6733 - loss: 0.7450 - val_accuracy: 0.6469 - val_loss: 0.7444
Epoch 7/100
[1m66/66[0m [32m━━━