Grid Search K-Fold Cross Validation to properly find best performing hyperparameters on the dataset and to calculate robust performance scores

In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV, KFold
%run Full_Neural_Network.ipynb

#load in the dataset and shuffle
df = pd.read_csv("heart_failure_clinical_records_dataset.csv")
data = df.to_numpy()
np.random.shuffle(data)

def runPipeline(data):
    #set up hyperparameters and parse the data properly
    params = {"layer_widths": [24, 36, 48], "learning_rates": [ .0012, .001, .00098, .00096], 
              "l2" : [.0009, .001, .003], "epochs" : [10000, 25000, 50000]}
    labels = data[:, -1]
    labels = labels.astype(int)
    features = data[:, :-1]
    res = runTuneTest(labels, features, params)
    print("Articial Neural Network Grid Search CV:")
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    for i in res:
        print("Parameters:")
        print(i[1])
        print("Accuracy:")
        print(i[0])
        
        
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        
def runTuneTest(labels, features, params):
    #perform a 2-Fold Cross Validation and pass the training set to GridSearch
    res = []
    skf = StratifiedKFold(n_splits=2)
    splits = skf.split(X=data, y=labels)
    for i, (train_index, test_index) in enumerate(splits):
        trainFeatures = [features[index] for index in train_index]
        trainLabels = [labels[index] for index in train_index]
        testFeatures = [features[index] for index in test_index]
        testLabels = [labels[index] for index in test_index]
        trainFeatures = np.array(trainFeatures)
        trainLabels = np.array(trainLabels)
        testFeatures = np.array(testFeatures)
        testLabels = np.array(testLabels)
        clf = GridSearch(trainFeatures, trainLabels, params)
        acc = trainANN(clf["learning_rates"], clf["layer_widths"], clf["l2"], clf["epochs"], 
                        trainFeatures, trainLabels, testFeatures, testLabels)
        res.append([acc, clf])
    return res
        
def GridSearch(trainFeatures, trainLabels, params):
    #Perform another 2-Fold Cross Validation and find best hyperparameters 
    #for the ANN
    skf = StratifiedKFold(n_splits=2)
    splits = skf.split(X=trainFeatures, y=trainLabels)
    best_params = {"layer_widths": 0, "learning_rates": 0, "epochs": 0, "l2": 0}
    best_acc = 0
    for i, (train_index, test_index) in enumerate(splits):
        gdTrain_Features = [trainFeatures[index] for index in train_index]
        gdTrain_Labels = [trainLabels[index] for index in train_index]
        gdTest_Features = [trainFeatures[index] for index in test_index]
        gdTest_Labels = [trainLabels[index] for index in test_index]
            
        gdTrain_Features = np.array(gdTrain_Features)
        gdTrain_Labels = np.array(gdTrain_Labels)
        gdTest_Features = np.array(gdTest_Features)
        gdTest_Labels = np.array(gdTest_Labels)
        
        for width in params["layer_widths"]:
            for learning_rate in params["learning_rates"]:
                for epoch in params["epochs"]:
                    for l2 in params["l2"]:
                        acc = trainANN(learning_rate, width, l2, epoch, 
                                gdTrain_Features, gdTrain_Labels, gdTest_Features, gdTest_Labels)
                        if acc>best_acc:
                            best_acc= acc
                            best_params["layer_widths"] = width
                            best_params["learning_rates"] = learning_rate
                            best_params["epochs"] = epoch
                            best_params["l2"] = l2
        
    #return the best performing parameters
    return best_params
                

                
def trainANN(lr, width, l2_regularizer, epochs, trFeatures, trLabels, teFeatures, teLabels):
    #set up the layers and the hyperparameters for the ANN
    dense1 = Layer_Dense(12, width, weight_regularizer_l2 = l2_regularizer,
        bias_regularizer_l2 = l2_regularizer)
    activation1 = Activation_ReLU()
    dense2 = Layer_Dense(width, width)
    activation2 = Activation_ReLU()
    dense3 = Layer_Dense(width, 2)
    loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()
    optimizer = Optimizer_Adam(learning_rate= lr, decay=5e-4)
    #training block
    for epoch in range(epochs):
        # Perform a forward pass of our training data through all layers and activation functions
        dense1.forward(trFeatures)
        activation1.forward(dense1.output)
        dense2.forward(activation1.output)
        activation2.forward(dense2.output)
        dense3.forward(activation2.output)
        #loss given our softmax output
        data_loss = loss_activation.forward(dense3.output, trLabels)
        #L2 loss
        regularization_loss = loss_activation.loss.regularization_loss(dense1) + loss_activation.loss.regularization_loss(dense2)
        #total loss
        loss = data_loss + regularization_loss
        predictions = np.argmax(loss_activation.output, axis=1)
        accuracy = np.mean(predictions==trLabels)
        #backpropogate
        loss_activation.backward(loss_activation.output, trLabels)
        dense3.backward(loss_activation.dinputs)
        activation2.backward(dense3.dinputs)
        dense2.backward(activation2.dinputs)
        activation1.backward(dense2.dinputs)
        dense1.backward(activation1.dinputs)
        # Update weights and biases
        optimizer.pre_update_params()
        optimizer.update_params(dense1)
        optimizer.update_params(dense2)
        optimizer.update_params(dense3)
        optimizer.post_update_params()
    #test testing data
    dense1.forward(teFeatures)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    activation2.forward(dense2.output)
    dense3.forward(activation2.output)
    loss = loss_activation.forward(dense3.output, teLabels)
    predictions = np.argmax(loss_activation.output, axis=1)
    accuracy = np.mean(predictions == teLabels)
    return accuracy

            


In [17]:
np.random.shuffle(data)
runPipeline(data)

Articial Neural Network Grid Search CV:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parameters:
{'layer_widths': 48, 'learning_rates': 0.0012, 'epochs': 10000, 'l2': 0.0009}
Accuracy:
0.7866666666666666
Parameters:
{'layer_widths': 24, 'learning_rates': 0.0012, 'epochs': 50000, 'l2': 0.0009}
Accuracy:
0.8053691275167785
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [None]:
np.random.shuffle(data)
runPipeline(data)