# Data Preprocessing Part

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# 1. Load the data
df = pd.read_csv("water_potability.csv")

# 2. Inspect missing values
print("Missing values:\n", df.isnull().sum())

# 3. Handle missing values with (mean imputation)
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)


# 5. Split features and target x,y
X = df_imputed.drop("Potability", axis=1)
y = df_imputed["Potability"]

# 6. Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=72)

Missing values:
 ph                 491
Hardness             0
Solids               0
Chloramines          0
Sulfate            781
Conductivity         0
Organic_carbon       0
Trihalomethanes    162
Turbidity            0
Potability           0
dtype: int64


# Loue Sauveur Christian (lscblack) code Cell


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

# Model architecture
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(0.001)),
    Dropout(0.2),  
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Optimizer
optimizer = Adam(learning_rate=0.0015)

# Compile
model.compile(
    loss='binary_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

# 5. Evaluate
y_pred_nn = model.predict(X_test)
y_pred_classes = (y_pred_nn > 0.5).astype(int)

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

acc = accuracy_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)
# summary viewer
print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.6132 - loss: 0.7648 - val_accuracy: 0.6336 - val_loss: 0.7399
Epoch 2/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6404 - loss: 0.7130 - val_accuracy: 0.6508 - val_loss: 0.7143
Epoch 3/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6743 - loss: 0.6865 - val_accuracy: 0.6508 - val_loss: 0.7017
Epoch 4/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6768 - loss: 0.6699 - val_accuracy: 0.6412 - val_loss: 0.6900
Epoch 5/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6769 - loss: 0.6602 - val_accuracy: 0.6374 - val_loss: 0.6833
Epoch 6/100
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6868 - loss: 0.6538 - val_accuracy: 0.6393 - val_loss: 0.6760
Epoch 7/100
[1m66/66[0m [32m━━━━━━━━━━━━━━

| Train Instance               | Engineer Name                     | Regularizer | Optimizer        | Early Stopping                   | Dropout Rate | Accuracy | F1 Score | Recall | Precision |
| ---------------------------- | --------------------------------- | ----------- | ---------------- | -------------------------------- | ------------ | -------- | -------- | ------ | --------- |
| water\_potability\_model\_v3 | Loue Sauveur Christian (lscblack) | L2 (0.001)  | Adam (lr=0.0015) | Patience=15, monitor='val\_loss' | 0.2          | 0.7027   | 0.5255   | 0.4219 | 0.6968   |
