In [1]:
#importing the dataset split
import joblib
X_train_sm, X_val, X_test, y_train_sm, y_val, y_test = joblib.load('split_data.pkl')


In [2]:
#checking the datasets
print(f"Training dataset shape:{X_train_sm.shape, y_train_sm.shape}")
print(f"Validation dataset shape:{X_val.shape, y_val.shape}")
print(f"Testing dataset shape:{X_test.shape, y_test.shape}")

Training dataset shape:((12856, 6), (12856,))
Validation dataset shape:((1414, 6), (1414,))
Testing dataset shape:((1414, 6), (1414,))


In [3]:
#Importing all libraries required for creating DNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Input
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau



In [4]:
#Defining a Creating DNN model function
def create_dnn(neurons1 = 32, neurons2 = 16, neurons3 = 8, activation1 = 'relu', activation2 = 'relu',
               activation3 = 'relu', optimizer = 'adam', learning_rate = 0.001, dropout_rate = 0.5,
               input_dim = 6):
    optimizerD = {
        'adam' : Adam(learning_rate = learning_rate),
        'sgd' : SGD(learning_rate = learning_rate),
        'rmsprop' : RMSprop(learning_rate = learning_rate),   
    }
    
    model = Sequential()
    
    #Input layer with 6 neurons
    model.add(Input(shape = (input_dim,)))
    
    #First hidden layer
    model.add(Dense(neurons1, activation = activation1))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    #Second hidden layer
    model.add(Dense(neurons2, activation = activation2))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    #Third hidden layer
    model.add(Dense(neurons3, activation = activation3))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    #Output layer with one neuron
    model.add(Dense(1, activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=optimizerD[optimizer], metrics=['accuracy'])
    return model

In [5]:
#Defining the parameter values of initial DNN model
model = create_dnn(neurons1 = 128, neurons2 = 64, neurons3 =32, activation1 = 'relu', activation2 = 'relu',
               activation3 = 'relu', optimizer = 'adam', learning_rate = 0.1, dropout_rate = 0.4,
               input_dim = 6)

#Training the DNN model 
dnn = model.fit(X_train_sm, y_train_sm, validation_data = (X_val, y_val), epochs = 50, batch_size = 32,
               callbacks = [
                    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True), 
                    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.000)
               ], verbose = 2)

Epoch 1/50
402/402 - 4s - 10ms/step - accuracy: 0.8514 - loss: 0.3734 - val_accuracy: 0.8861 - val_loss: 0.2414 - learning_rate: 0.1000
Epoch 2/50
402/402 - 1s - 2ms/step - accuracy: 0.8741 - loss: 0.3238 - val_accuracy: 0.9222 - val_loss: 0.1895 - learning_rate: 0.1000
Epoch 3/50
402/402 - 1s - 3ms/step - accuracy: 0.8871 - loss: 0.3019 - val_accuracy: 0.9250 - val_loss: 0.1453 - learning_rate: 0.1000
Epoch 4/50
402/402 - 1s - 2ms/step - accuracy: 0.8839 - loss: 0.3049 - val_accuracy: 0.8996 - val_loss: 0.2358 - learning_rate: 0.1000
Epoch 5/50
402/402 - 1s - 3ms/step - accuracy: 0.8911 - loss: 0.2900 - val_accuracy: 0.8289 - val_loss: 0.5284 - learning_rate: 0.1000
Epoch 6/50
402/402 - 1s - 2ms/step - accuracy: 0.8951 - loss: 0.2926 - val_accuracy: 0.9038 - val_loss: 0.2029 - learning_rate: 0.1000
Epoch 7/50
402/402 - 1s - 2ms/step - accuracy: 0.9176 - loss: 0.2291 - val_accuracy: 0.9151 - val_loss: 0.1871 - learning_rate: 0.0200
Epoch 8/50
402/402 - 1s - 3ms/step - accuracy: 0.9218 

In [6]:
model.summary()

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, confusion_matrix

#Evaluating DNN model using different evaluations metrics
y_val_pred_prob = model.predict(X_val)
y_val_pred = (y_val_pred_prob > 0.5).astype(int)
accuracy_inl = accuracy_score(y_val, y_val_pred)
precision_inl = precision_score(y_val, y_val_pred)
recall_inl = recall_score(y_val, y_val_pred)
f1_inl = f1_score(y_val, y_val_pred)
mse_inl = mean_squared_error(y_val, y_val_pred)
confmx_inl = confusion_matrix(y_val, y_val_pred)

print("Initial Model Evaluation")
print(f"Accuracy:{accuracy_inl}")
print(f"Precision:{precision_inl}")
print(f"Recall:{recall_inl}")
print(f"F1-Score:{f1_inl}")
print(f"MSE:{mse_inl}")
print(f"Confusion Matrix:\n{confmx_inl}")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Initial Model Evaluation
Accuracy:0.925035360678925
Precision:0.25
Recall:0.7619047619047619
F1-Score:0.3764705882352941
MSE:0.07496463932107496
Confusion Matrix:
[[1276   96]
 [  10   32]]


In [15]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
# Define KerasClassifier for RandomizedSearchCV
model = KerasClassifier(
    build_fn=create_dnn,
    neurons1 = 128, neurons2 = 64, neurons3 = 32, activation1 = 'relu', activation2 = 'relu',
    activation3 = 'relu', optimizer = 'adam', learning_rate = 0.001, dropout_rate = 0.5,
    input_dim= 6, batch_size=64, epochs=50, verbose=0
)

# Define the hyperparameter grid for RandomizedSearchCV
param_grid = {
    'neurons1': [128, 64, 32],
    'neurons2': [64, 32, 16],
    'neurons3': [32, 16, 8],
    'activation1': ['relu', 'tanh'],
    'activation2': ['relu', 'tanh'],
    'activation3': ['relu', 'tanh'], 
    'optimizer': ['adam', 'sgd', 'rmsprop'],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1],
    'dropout_rate': [0.2, 0.3,0.4, 0.5],
    'batch_size': [32, 64, 128],
    'epochs': [50, 100]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, 
                                   n_iter=20, cv=3, verbose=2, n_jobs=-1, error_score='raise')

# Fit RandomizedSearchCV
best_dnn_model = random_search.fit(X_train_sm, y_train_sm)

# Output the best hyperparameters
print("Best Hyperparameters found:")
print(best_dnn_model.best_params_)


Fitting 3 folds for each of 20 candidates, totalling 60 fits


  X, y = self._initialize(X, y)


Best Hyperparameters found:
{'optimizer': 'adam', 'neurons3': 32, 'neurons2': 16, 'neurons1': 128, 'learning_rate': 0.001, 'epochs': 50, 'dropout_rate': 0.2, 'batch_size': 128, 'activation3': 'relu', 'activation2': 'relu', 'activation1': 'relu'}


In [16]:
y_val_pred_prob = best_dnn_model.predict(X_val)
y_val_pred = (y_val_pred_prob > 0.5).astype(int)
accuracy_inl = accuracy_score(y_val, y_val_pred)
precision_inl = precision_score(y_val, y_val_pred)
recall_inl = recall_score(y_val, y_val_pred)
f1_inl = f1_score(y_val, y_val_pred)
mse_inl = mean_squared_error(y_val, y_val_pred)
confmx_inl = confusion_matrix(y_val, y_val_pred)

print("Initial Model Evaluation")
print(f"Accuracy:{accuracy_inl}")
print(f"Precision:{precision_inl}")
print(f"Recall:{recall_inl}")
print(f"F1-Score:{f1_inl}")
print(f"MSE:{mse_inl}")
print(f"Confusion Matrix:\n{confmx_inl}")

Initial Model Evaluation
Accuracy:0.9476661951909476
Precision:0.3431372549019608
Recall:0.8333333333333334
F1-Score:0.4861111111111111
MSE:0.05233380480905234
Confusion Matrix:
[[1305   67]
 [   7   35]]


In [None]:
#Model validation usng test dataset
y_test_pred_prob = best_dnn_model.predict(X_test)
y_test_pred = (y_test_pred_prob > 0.5).astype(int)

accuracy_test = accuracy_score(y_test, y_test_pred)
print(f"Accuracy: {accuracy_test}")

