In [1]:
import os
from datetime import datetime

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from tensorflow import squeeze
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from timeit import default_timer as timer
from tensorflow.keras.layers import Dense, Dropout

from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt


path = os.path.abspath(os.getcwd())
log_path=path+"\\graph"+ datetime.now().strftime("%Y%m%d-%H%M%S")

In [2]:
df = pd.read_csv('df_clean_v2.csv')
df = df.drop('Unnamed: 0', axis=1)

X = df.drop('Potability', axis=1)
y = df['Potability']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
class TimingCallback(Callback):
        def __init__(self, logs={}):
            self.logs=[]
        def on_epoch_begin(self, epoch, logs={}):
            self.starttime = timer()
        def on_epoch_end(self, epoch, logs={}):
            self.logs.append(timer()-self.starttime)



early_stopping = EarlyStopping(monitor = "val_accuracy",
                                   min_delta = 0.0002,
                                   patience = 4, 
                                   verbose = 1)


reduce_learning_rate = ReduceLROnPlateau(monitor = "val_accuracy",
                                    min_delta = 0.005,
                                    patience = 2,
                                    factor = 0.5, 
                                    cooldown = 1,
                                    verbose = 1)

save_early_water = ModelCheckpoint(filepath=path + "/h5/water.keras",
                                               verbose=1,
                                               save_best_only=True)

time_callback = TimingCallback()



callbacks_water = [early_stopping,reduce_learning_rate,save_early_water,time_callback]

In [4]:
model_water = Sequential()
model_water.add(Dense(1024,activation='relu'))
model_water.add(Dropout(0.2))
model_water.add(Dense(512,activation='relu'))
model_water.add(Dropout(0.2))
model_water.add(Dense(128,activation='relu'))
model_water.add(Dropout(0.2))
model_water.add(Dense(32,activation='relu'))
model_water.add(Dropout(0.2))
model_water.add(Dense(1,activation='sigmoid'))

model_water.summary()
model_water.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [5]:
watermodel = model_water.fit(X_train, 
                        y_train,
                        batch_size = 128,
                        validation_data=(X_test,y_test),
                        callbacks=callbacks_water,
                        epochs=15,
                        verbose = True
                        )

Epoch 1/15
[1m15/21[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.5704 - loss: 0.6812  
Epoch 1: val_loss improved from inf to 0.64180, saving model to /Users/saganne.chevalier/Documents/plateforme/water/h5/water.keras
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5826 - loss: 0.6756 - val_accuracy: 0.6433 - val_loss: 0.6418 - learning_rate: 0.0010
Epoch 2/15
[1m14/21[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.6738 - loss: 0.6266 
Epoch 2: val_loss improved from 0.64180 to 0.62159, saving model to /Users/saganne.chevalier/Documents/plateforme/water/h5/water.keras
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6734 - loss: 0.6266 - val_accuracy: 0.6662 - val_loss: 0.6216 - learning_rate: 0.0010
Epoch 3/15
[1m15/21[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.6734 - loss: 0.5958 
Epoch 3: val_loss did not impr

In [6]:
model_water.save(path + "/h5/water_saved.keras")

In [7]:
y_pred = model_water.predict(X_test, verbose = 1)
y_pred = squeeze(y_pred)
y_pred = np.array([1 if x >= 0.5 else 0 for x in y_pred])

y_test = np.array(y_test)

print(classification_report(y_test, y_pred))

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
              precision    recall  f1-score   support

           0       0.70      0.82      0.76       400
           1       0.62      0.46      0.53       256

    accuracy                           0.68       656
   macro avg       0.66      0.64      0.64       656
weighted avg       0.67      0.68      0.67       656

