In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [2]:
# Load the dataset
dataset_path = "/content/water_potability.csv"
df = pd.read_csv(dataset_path)

In [3]:
# Data Preprocessing: Handling missing values
df.fillna(df.mean(), inplace=True)

In [4]:
# Splitting features and target
X = df.drop(columns=["Potability"])
y = df["Potability"]

In [5]:
# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# Splitting data into training (70%), validation (15%), and testing (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [7]:
# Building a Neural Network model with Batch Normalization, L2 Regularization, and Adam Optimizer
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.005)),
        layers.BatchNormalization(),
        layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.005)),
        layers.BatchNormalization(),
        layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01),  # Adam optimizer
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [8]:
# Instantiate and train the model
model = build_model()
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32, callbacks=[early_stopping])


Epoch 1/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.5817 - loss: 1.2097 - val_accuracy: 0.6517 - val_loss: 0.8240
Epoch 2/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6262 - loss: 0.7948 - val_accuracy: 0.6375 - val_loss: 0.7237
Epoch 3/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6331 - loss: 0.7073 - val_accuracy: 0.6538 - val_loss: 0.6920
Epoch 4/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6547 - loss: 0.6942 - val_accuracy: 0.6415 - val_loss: 0.6847
Epoch 5/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6554 - loss: 0.6943 - val_accuracy: 0.6497 - val_loss: 0.6809
Epoch 6/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6459 - loss: 0.6819 - val_accuracy: 0.6680 - val_loss: 0.6572
Epoch 7/50
[1m72/72[0m [32m━━━━━━━━━

In [9]:
# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype("int32")
report = classification_report(y_test, y_pred)
print(report)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
              precision    recall  f1-score   support

           0       0.68      0.94      0.79       307
           1       0.72      0.27      0.39       185

    accuracy                           0.69       492
   macro avg       0.70      0.60      0.59       492
weighted avg       0.70      0.69      0.64       492

