In [38]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [39]:
#Dataset Type
n=9

In [40]:
# Import Data and convert to tensor
binary = True

X = np.load("Datasets/kryptonite-%s-X.npy"%(n))
y = np.load("Datasets/kryptonite-%s-y.npy"%(n))
if binary:
    X = np.where(X>0.5, 1, 0)

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 20% test

X_temp = torch.tensor(X_temp.astype(np.float32)).to(device)
y_temp = torch.tensor(y_temp.astype(np.float32)).unsqueeze(1).to(device)

X_test = torch.tensor(X_test.astype(np.float32)).to(device)
y_test = torch.tensor(y_test.astype(np.float32)).unsqueeze(1).to(device)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, shuffle=False)

In [41]:
# Define Model

class NeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [42]:
def homogenise(lists):
    max_length = max(len(sublist) for sublist in lists)  # Find the length of the longest list
    for sublist in lists:
        sublist.extend([sublist[-1]] * (max_length - len(sublist)))  # Extend with last element
    return lists

In [43]:
dir = "plots_hyperparameter_tuning"

In [44]:
def plot_accuracy(train_acc_list, val_acc_list, id=None):
    # Mean and std across each each kth fold of validation
    train_acc = []
    val_acc = []

    for i in range(5):
        train_acc.append(train_acc_list[i])
        val_acc.append(val_acc_list[i])

    train_acc = np.array(homogenise(train_acc))
    val_acc = np.array(homogenise(val_acc))

    train_mean = np.mean(train_acc, axis=0)
    val_mean = np.mean(val_acc, axis=0)

    train_std = np.std(train_acc, axis=0)
    val_std = np.std(val_acc, axis=0)

    plt.plot(train_mean)
    plt.fill_between(range(len(train_mean)),train_mean-train_std,train_mean+train_std,alpha=.6)
    plt.title("Training Accuracy per epoch")
    if id:
        plt.savefig(f"{dir}/train_acc_{id}.png")
    else:
        plt.savefig(f"{dir}/train_acc.png")
    plt.close()

    plt.plot(val_mean)
    plt.fill_between(range(len(val_mean)),val_mean-val_std,val_mean+val_std,alpha=.6)
    plt.title("Validation Accuracy per epoch")
    if id:
        plt.savefig(f"{dir}/val_acc_{id}.png")
    else:
        plt.savefig(f"{dir}/val_acc.png")
    plt.close()

    print("Training accuracy (last epoch)")
    for i in range(5):
        print("K-fold", i, ":", train_acc[i][-1])
    print("---------------------------------")
    print("Validation accuracy (last epoch)")
    for i in range(5):
        print("K-fold", i, ":", val_acc[i][-1])
    print("---------------------------------")
    
    print("Training accuracy (best)")
    for i in range(5):
        print("K-fold", i, ":", train_acc[i].max())
    print("---------------------------------")
    
    average_best_validation_accuracy = 0
    print("Validation accuracy (best)")
    for i in range(5):
        print("K-fold", i, ":", val_acc[i].max())
        average_best_validation_accuracy += val_acc[i].max()
    print("---------------------------------")
    average_best_validation_accuracy /= 5
    
    return average_best_validation_accuracy

In [45]:
def plot_losses(train_loss_list, val_loss_list, id=None):
    # Mean and std across each each kth fold of validation
    train_loss = []
    val_loss = []

    for i in range(5):
        train_loss.append(train_loss_list[i])
        val_loss.append(val_loss_list[i])

    train_loss = np.array(homogenise(train_loss))
    val_loss = np.array(homogenise(val_loss))

    train_mean = np.mean(train_loss, axis=0)
    val_mean = np.mean(val_loss, axis=0)

    train_std = np.std(train_loss, axis=0)
    val_std = np.std(val_loss, axis=0)

    plt.plot(train_mean)
    plt.fill_between(range(len(train_mean)),train_mean-train_std,train_mean+train_std,alpha=.6)
    plt.title("Training Loss per epoch")
    if id:
        plt.savefig(f"{dir}/train_loss_{id}.png")
    else:
        plt.savefig(f"{dir}/train_loss.png")
    plt.close()

    plt.plot(val_mean)
    plt.fill_between(range(len(val_mean)),val_mean-val_std,val_mean+val_std,alpha=.6)
    plt.title("Validation Loss per epoch")
    if id:
        plt.savefig(f"{dir}/val_loss_{id}.png")
    else:
        plt.savefig(f"{dir}/val_loss.png")
    plt.close()

    print("Training loss (last epoch)")
    for i in range(5):
        print("K-fold", i, ":", train_loss[i][-1])
    print("---------------------------------")
    print("Validation loss (last epoch)")
    for i in range(5):
        print("K-fold", i, ":", val_loss[i][-1])
    print("---------------------------------")
    
    print("Training loss best")
    for i in range(5):
        print("K-fold", i, ":", train_loss[i].min())
    print("---------------------------------")
    
    avg_best_validation_loss = 0
    print("Validation loss (best)")
    for i in range(5):
        print("K-fold", i, ":", val_loss[i].min())
        avg_best_validation_loss += val_loss[i].min()
    print("---------------------------------")
    avg_best_validation_loss /= 5
    
    return avg_best_validation_loss


In [46]:
def model_training(hyperparameters):
    # Store loss and accuracy for each K-fold 
    train_acc_list={0:[], 1:[], 2:[], 3:[], 4:[]}
    val_acc_list = {0:[], 1:[], 2:[], 3:[], 4:[]}

    train_loss_list={0:[], 1:[], 2:[], 3:[], 4:[]}
    val_loss_list = {0:[], 1:[], 2:[], 3:[], 4:[]}
    
    # Enable or disable early stopping
    early_stopping_enabled = False

    # K-fold training loop
    count=0
    kf = KFold(n_splits=5, shuffle=True)
    for train_index, val_index in kf.split(X_temp):
        X_train_tensor, X_val_tensor = X_temp[train_index], X_temp[val_index]
        y_train_tensor, y_val_tensor = y_temp[train_index], y_temp[val_index]

        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=hyperparameters["batch_size"], shuffle=True)

        val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
        val_loader = DataLoader(val_dataset, batch_size=hyperparameters["batch_size"])

        model = NeuralNet(n, hyperparameters["hidden_size"]).to(device)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=hyperparameters["lr"], weight_decay=hyperparameters["alpha"])

        num_epochs = hyperparameters["num_epochs"]
        
        # Variables required for early stopping
        early_stopping_counter = 0
        patience = 5
        best_val_loss = float('inf')
        
        for _ in range(num_epochs):
            model.train()
            running_loss = 0.0
            correct = 0 # accuracy cal
            for input, label in train_loader:
                optimizer.zero_grad()
                outputs = model(input)
                loss = criterion(outputs, label)
                loss.backward()
                optimizer.step()
                
                correct += (outputs.round()==label).float().sum().item()
                running_loss+=loss.item()

            avg_loss = running_loss/len(train_loader)
            accuracy = 100*correct/len(X_train_tensor)
            train_loss_list[count].append(avg_loss)
            train_acc_list[count].append(accuracy)

            model.eval()
            valid_loss = 0.0
            correct = 0
            for input, label in val_loader:
                target = model(input)
                loss = criterion(target, label)
                valid_loss += loss.item()
                correct += (target.round()==label).float().sum().item()
            avg_loss = valid_loss/len(val_loader)
            accuracy = 100*correct/len(X_val_tensor)
            val_loss_list[count].append(avg_loss)
            val_acc_list[count].append(accuracy)
            
            # Early stopping check
            if early_stopping_enabled:
                if avg_loss < best_val_loss:
                    best_val_loss = avg_loss
                    early_stopping_counter = 0
                else:
                    early_stopping_counter += 1
                if early_stopping_counter > patience:
                    break
                
        count+=1

    file_id_for_plot = f"n_{n}"
    for key, val in hyperparameters.items():
        file_id_for_plot += f"_{key}_{val}"
    file_id_for_plot = file_id_for_plot.replace(".", "-")
    
    print("---------------------------------")
    print(file_id_for_plot)
    print("---------------------------------")
    
    average_best_validation_accuracy = plot_accuracy(train_acc_list, val_acc_list, id=file_id_for_plot)
    avg_best_validation_loss = plot_losses(train_loss_list, val_loss_list, id=file_id_for_plot)
    
    return average_best_validation_accuracy, avg_best_validation_loss
    
    

In [None]:
lr_values = [0.001, 0.01, 0.05, 0.1]
alpha_values = [0.0001, 0.001, 0.01, 0.1]
hidden_layer_sizes = [2*n, 4*n, 8*n, 16*n]

best_val_accuracies = {}
best_val_losses = {}

for lr in lr_values:
    for alpha in alpha_values:
        for hidden_layer_size in hidden_layer_sizes:
            hyperparameters = {
                "hidden_size": hidden_layer_size, 
                "lr": lr, 
                "alpha": alpha, 
                "batch_size":128, 
                "num_epochs":100
            }
            best_val_acc, best_val_loss = model_training(hyperparameters)
            hashable_hyperparameters = tuple(sorted(hyperparameters.items()))
            
            best_val_accuracies[hashable_hyperparameters] = best_val_acc
            best_val_losses[hashable_hyperparameters] = best_val_loss