<a href="https://colab.research.google.com/github/mutabazichristian/water-quality-model/blob/christian/dropout%26reg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
dataset = pd.read_csv('/content/drive/MyDrive/ALU/datasets/water_potability.csv')

In [22]:
dataset.head()

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,,204.890455,20791.318981,7.300212,368.516441,564.308654,10.379783,86.99097,2.963135,0
1,3.71608,129.422921,18630.057858,6.635246,,592.885359,15.180013,56.329076,4.500656,0
2,8.099124,224.236259,19909.541732,9.275884,,418.606213,16.868637,66.420093,3.055934,0
3,8.316766,214.373394,22018.417441,8.059332,356.886136,363.266516,18.436524,100.341674,4.628771,0
4,9.092223,181.101509,17978.986339,6.5466,310.135738,398.410813,11.558279,31.997993,4.075075,0


In [23]:
#Preprocess
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [24]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.15, train_size=0.85, random_state=42
)

# Model Architecture with Specified Techniques
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],),
                           kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile with Adamax and custom learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adamax(learning_rate=0.002),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Early Stopping Configuration
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=0.001,
    restore_best_weights=True
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
# Train Model
history = model.fit(
    X_train, y_train,
    validation_split=0.15,
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)


Epoch 1/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.6170 - loss: nan - val_accuracy: 0.6005 - val_loss: nan
Epoch 2/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6353 - loss: nan - val_accuracy: 0.6005 - val_loss: nan
Epoch 3/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6076 - loss: nan - val_accuracy: 0.6005 - val_loss: nan
Epoch 4/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6191 - loss: nan - val_accuracy: 0.6005 - val_loss: nan
Epoch 5/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6091 - loss: nan - val_accuracy: 0.6005 - val_loss: nan
Epoch 6/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6104 - loss: nan - val_accuracy: 0.6005 - val_loss: nan


In [28]:
# Evaluate Model
y_pred = (model.predict(X_test) > 0.5).astype(int)
y_pred_flat = y_pred.flatten()
print("Classification Report:")
classification_report(y_test, y_pred_flat)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


'              precision    recall  f1-score   support\n\n           0       0.62      1.00      0.77       306\n           1       0.00      0.00      0.00       186\n\n    accuracy                           0.62       492\n   macro avg       0.31      0.50      0.38       492\nweighted avg       0.39      0.62      0.48       492\n'

In [31]:
# Performance Metrics
accuracy = model.evaluate(X_test, y_test)[1]
f1 = f1_score(y_test, y_pred_flat)
precision = precision_score(y_test, y_pred_flat)
recall = recall_score(y_test, y_pred_flat)
print(f"\nAccuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6450 - loss: nan 

Accuracy: 0.6219512224197388
F1 Score: 0.0
Precision: 0.0
Recall: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
