<a href="https://colab.research.google.com/github/ngabo-dev/water-model-peer-group-4/blob/main/Nhial_formative_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
# === Nhial Majok - Step 1: Load and preprocess the dataset ===

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras import layers, regularizers














Load dataset

In [20]:
# Load dataset
df = pd.read_csv("water_potability.csv")
df.fillna(df.mean(), inplace=True)

In [27]:
df.describe()

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
count,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0
mean,7.080795,196.369496,22014.092526,7.122277,333.775777,426.205111,14.28497,66.396293,3.966786,0.39011
std,1.469956,32.879761,8768.570828,1.583085,36.142612,80.824064,3.308162,15.769881,0.780382,0.487849
min,0.0,47.432,320.942611,0.352,129.0,181.483754,2.2,0.738,1.45,0.0
25%,6.277673,176.850538,15666.690297,6.127421,317.094638,365.734414,12.065801,56.647656,3.439711,0.0
50%,7.080795,196.967627,20927.833607,7.130299,333.775777,421.884968,14.218338,66.396293,3.955028,0.0
75%,7.87005,216.667456,27332.762127,8.114887,350.385756,481.792304,16.557652,76.666609,4.50032,1.0
max,14.0,323.124,61227.196008,13.127,481.030642,753.34262,28.3,124.0,6.739,1.0


Features and labels

In [21]:

# Features and labels
X = df.drop("Potability", axis=1)
y = df["Potability"]

Feature scaling

In [22]:
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

data spliting

In [23]:


# Train/Validation/Test split
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

Build and compile the model

In [24]:
# === Nhial Majok - Step 2: Build and compile the model ===

model = tf.keras.Sequential([
    layers.Dense(128, kernel_regularizer=regularizers.l2(0.0005), input_shape=(X_train.shape[1],)),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.35),

    layers.Dense(64, kernel_regularizer=regularizers.l2(0.0005)),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.35),

    layers.Dense(32, kernel_regularizer=regularizers.l2(0.0005)),
    layers.LeakyReLU(alpha=0.1),

    layers.Dense(1, activation='sigmoid')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train the model

In [25]:
# === Nhial Majok - Step 3: Train the model ===

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=150,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/150
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.4029 - loss: 0.8254 - val_accuracy: 0.6069 - val_loss: 0.7463
Epoch 2/150
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6110 - loss: 0.7466 - val_accuracy: 0.6110 - val_loss: 0.7345
Epoch 3/150
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6141 - loss: 0.7363 - val_accuracy: 0.6110 - val_loss: 0.7286
Epoch 4/150
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6070 - loss: 0.7383 - val_accuracy: 0.6130 - val_loss: 0.7235
Epoch 5/150
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6133 - loss: 0.7336 - val_accuracy: 0.6151 - val_loss: 0.7185
Epoch 6/150
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6202 - loss: 0.7217 - val_accuracy: 0.6151 - val_loss: 0.7142
Epoch 7/150
[1m72/72[0m [32m━━━

Evaluate the model

In [26]:

# === Nhial Majok - Step 4: Evaluate the model ===

y_pred_probs = model.predict(X_test)
custom_threshold = 0.4  # lowered from 0.5 to improve recall
y_pred = (y_pred_probs > custom_threshold).astype(int)
# Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("\nNhial Majok - Evaluation Results")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step

Nhial Majok - Evaluation Results
Accuracy : 0.6768
Precision: 0.5943
Recall   : 0.5417
F1 Score : 0.5668
