# Data Preprocessing Part

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# 1. Load the data
df = pd.read_csv("water_potability.csv")

# 2. Inspect missing values
print("Missing values:\n", df.isnull().sum())

# 3. Handle missing values with (mean imputation)
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)


# 5. Split features and target x,y
X = df_imputed.drop("Potability", axis=1)
y = df_imputed["Potability"]

# 6. Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=72)

Missing values:
 ph                 491
Hardness             0
Solids               0
Chloramines          0
Sulfate            781
Conductivity         0
Organic_carbon       0
Trihalomethanes    162
Turbidity            0
Potability           0
dtype: int64


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

# Model architecture
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(0.001)),
    Dropout(0.2),  
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Optimizer
optimizer = Adam(learning_rate=0.0015)

# Compile
model.compile(
    loss='binary_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

# 5. Evaluate
y_pred_nn = model.predict(X_test)
y_pred_classes = (y_pred_nn > 0.5).astype(int)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

acc = accuracy_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)
# summary viewer
print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.5948 - loss: 0.7812 - val_accuracy: 0.6393 - val_loss: 0.7351
Epoch 2/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.6542 - loss: 0.7187 - val_accuracy: 0.6450 - val_loss: 0.7030
Epoch 3/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6494 - loss: 0.7008 - val_accuracy: 0.6431 - val_loss: 0.6906
Epoch 4/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6823 - loss: 0.6787 - val_accuracy: 0.6527 - val_loss: 0.6822
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6836 - loss: 0.6611 - val_accuracy: 0.6527 - val_loss: 0.6823
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6834 - loss: 0.6541 - val_accuracy: 0.6431 - val_loss: 0.6721
Epoch 7/100
[1m66/66[0m [32m━

| Train Instance               | Engineer Name                     | Regularizer | Optimizer        | Early Stopping                   | Dropout Rate | Accuracy | F1 Score | Recall | Precision |
| ---------------------------- | --------------------------------- | ----------- | ---------------- | -------------------------------- | ------------ | -------- | -------- | ------ | --------- |
| water\_potability\_model\_v3 | Loue Sauveur Christian (lscblack) | L2 (0.001)  | Adam (lr=0.0015) | Patience=15, monitor='val\_loss' | 0.2          | 0.7027   | 0.5255   | 0.4219 | 0.6968   |


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

model = Sequential([
    Input(shape=(X_train.shape[1],)),  # <-- Add Input layer explicitly
    Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=1e-4, l2=1e-3)),
    Dropout(0.3),  
    Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=1e-4, l2=1e-3)),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

optimizer = RMSprop(learning_rate=0.0008)

model.compile(
    loss='binary_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)


Epoch 1/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5761 - loss: 0.8653 - val_accuracy: 0.6088 - val_loss: 0.8316
Epoch 2/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6259 - loss: 0.8109 - val_accuracy: 0.6298 - val_loss: 0.8054
Epoch 3/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6268 - loss: 0.7933 - val_accuracy: 0.6374 - val_loss: 0.7843
Epoch 4/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6471 - loss: 0.7698 - val_accuracy: 0.6603 - val_loss: 0.7679
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6519 - loss: 0.7559 - val_accuracy: 0.6527 - val_loss: 0.7553
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6591 - loss: 0.7393 - val_accuracy: 0.6489 - val_loss: 0.7415
Epoch 7/100
[1m66/66[0m [32m━━━