In [96]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import os
import keras_tuner as kt

from tensorflow.keras import layers, Model, optimizers, losses, callbacks
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split, GridSearchCV
from keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasClassifier

In [97]:
data = pd.read_csv('../data/all_data.csv')

In [98]:
train, test = train_test_split(data, test_size=0.3, stratify=data["Is high risk"], random_state=42)
test, val = train_test_split(test, test_size=0.5, stratify=test["Is high risk"], random_state=42)

In [99]:
X_train = train.drop("Is high risk", axis=1)
Y_train = train["Is high risk"]

X_test = test.drop("Is high risk", axis=1)
Y_test = test["Is high risk"]

X_val = val.drop("Is high risk", axis=1)
Y_val = val["Is high risk"]

In [100]:
def model_builder(hp):
  x = layers.Input(shape=(X_train.shape[1],))
  h = layers.Dense(64, activation='relu')(x)
  h = layers.Dense(64, activation='relu')(h)
  h = layers.Dense(64, activation='relu')(h)
  h = layers.Dense(64, activation='relu')(h)

  y = layers.Dense(1, activation='sigmoid')(h)

  model = Model(inputs=x, outputs=y)

  # Tune the learning rate for the optimizer
  # Choose an optimal value 
  hp_learning_rate = hp.Choice('learning_rate', values=[0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001])

  optimizer = optimizers.Adam(learning_rate=hp_learning_rate, 
                            beta_1=0.9, 
                            beta_2=0.999, 
                            amsgrad=True)

  model.compile(optimizer=optimizer,
                loss=losses.BinaryCrossentropy(from_logits=False),
                metrics=['accuracy'])

  return model

In [101]:
#delete Folder optimization if it exists
if os.path.exists('optimization'):
    os.system('rm -rf optimization')

In [102]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=1000,
                     factor=5,
                     directory='optimization',
                     project_name='ml')

In [103]:
stop_early = callbacks.EarlyStopping(monitor='val_loss', patience=100)

early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    min_delta=0.001,     # Minimum change to qualify as an improvement
    patience=10,         # How many epochs to wait after last time val loss improved
    verbose=1,
    mode='min',          # The training will stop when the quantity monitored has stopped decreasing
    restore_best_weights=True  # Restores model weights from the epoch with the best value of the monitored quantity.
)

In [104]:
tuner.search(X_train, Y_train, 
             epochs=1000, 
             validation_data=(X_val, Y_val), 
            #  callbacks=[early_stopping]
             )

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

Trial 7 Complete [00h 00m 03s]
val_accuracy: 0.9829950928688049

Best val_accuracy So Far: 0.9829950928688049
Total elapsed time: 00h 00m 19s

The hyperparameter search is complete. The optimal number of units in the optimal learning rate for the optimizer
is 0.01.



In [105]:
# batch_size = 32
# epochs = 1000

# history = model.fit(X_train, 
#                     Y_train, 
#                     batch_size=batch_size,
#                     validation_data=(X_val, Y_val),
#                     callbacks=[early_stopping],
#                     epochs=epochs)


In [106]:
# # Print the model training history for accuracy
# accuracy = history.history['accuracy']
# val_accuracy = history.history['val_accuracy']
# epochs = range(1, len(accuracy) + 1)

# plt.plot(epochs, accuracy)
# plt.plot(epochs, val_accuracy)
# plt.title('Model Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoche')
# plt.legend(['Training', 'Validation'], loc='lower left')
# # plt.xticks(np.arange(1, len(accuracy)+1, 1))
# plt.show()

In [107]:
# # print the model training history for loss
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# epochs = range(1, len(loss) + 1)

# plt.plot(epochs, loss)
# plt.plot(epochs, val_loss)
# plt.title('Model Loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoche')
# plt.legend(['Training', 'Validation'], loc='upper left')
# # plt.xticks(np.arange(1, len(loss)+1, 1))
# plt.show()

In [108]:
# model.evaluate(X_test, Y_test)

In [109]:
# model.evaluate(X_val, Y_val)

In [110]:
# model.evaluate(X_train, Y_train)