In [319]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import torch
import torch.nn as nn
import torch
from torch import nn
from sklearn.model_selection import KFold
from itertools import product
from monk_helpers import CV,SEED
import matplotlib.pyplot as plt

In [320]:
# Datasets Path
TR_PATH = "./data/ML-CUP23-TR.csv"
TS_PATH = "./data/ML-CUP23-TS.csv"

In [321]:

def plot_graph(train_losses,validation_losses,epochs):
    num_epochs = list(range(1, epochs + 1))  
    # Plotting
    plt.plot(num_epochs, train_losses, label='Training Loss')
    plt.plot(num_epochs, validation_losses, label='Test Loss')

    plt.title('Training and Validation Losses Across Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [322]:
class Net(nn.Module):

  def __init__(self, input_size, units, output_size):
    super().__init__()
    self.units = units

    self.l1 = nn.Linear(input_size, units)
    self.l2 = nn.Linear(units, output_size)

  def forward(self, x):
    out = torch.tanh(self.l1(x))
    out = self.l2(out)
    return out


def reset_weights(net):
  for param in net.parameters():
    torch.nn.init.uniform_(param, a=-0.7, b=0.7)

In [323]:
def read_ds(path):
  """
  parse CSV data set and
  returns a tuple (input, target)
  """
  data = pd.read_csv(path, dtype=object, delimiter=",", header=None, skiprows=1, names=["id", "INPUT_0", "INPUT_1", "INPUT_2", "INPUT_3", "INPUT_4", "INPUT_5", "INPUT_6", "INPUT_7", "INPUT_8", "INPUT_9", "TARGET_x", "TARGET_y", "TARGET_z"])
  y = data.drop(["id","INPUT_0", "INPUT_1", "INPUT_2", "INPUT_3", "INPUT_4", "INPUT_5", "INPUT_6", "INPUT_7", "INPUT_8", "INPUT_9"], axis=1)
  X = data.drop(["id","TARGET_x", "TARGET_y", "TARGET_z"], axis=1).astype(float).to_numpy()

  y = y.astype(float).to_numpy()

  return np.concatenate((y, X), axis=1)

In [324]:
read_ds(TR_PATH)

array([[  7.897453  , -35.936382  ,  21.077147  , ...,   0.40764457,
         -0.68854785,   0.6168897 ],
       [ -9.330632  ,  19.901571  ,   6.0691543 , ...,   0.98098207,
          0.6617593 ,  -0.8001547 ],
       [ 14.8494005 ,   3.3740904 ,  19.667479  , ...,   0.5991635 ,
         -0.6846301 ,   0.9229005 ],
       ...,
       [  7.265506  , -53.497242  ,   2.815666  , ...,  -0.9873102 ,
          0.9376967 ,   0.14342013],
       [  5.5452743 , -63.348396  ,  27.98934   , ...,  -0.64811015,
         -0.9552309 ,   0.9012979 ],
       [  6.16061   ,   8.321016  ,  13.021444  , ...,   0.8920809 ,
         -0.19820416,   0.49458626]])

In [325]:
class ToleranceStopper:
  def __init__(self, patience=1, min_delta=0):
    self.patience = patience
    self.min_delta = min_delta
    self.counter = 0
    self.min_training_loss = np.inf

  def tol_stop(self, training_loss):
    if training_loss > (self.min_training_loss - self.min_delta):
      self.counter +=1
      if self.counter >= self.patience:
        return True
    else: 
      self.counter = 0
    if training_loss < self.min_training_loss:
      self.min_training_loss = training_loss
          
    return False

In [326]:
def fit_model(input_size,hidden_size,output_size,learning_rate,momentum,weight_decay,opt,epochs,trainloader,loss_function,testloader):
    # Init the neural network
    network = Net(input_size, 3, output_size)
    network.apply(reset_weights) #reset weights with random initialization
    
    # Initialize optimizer
    if (opt.__name__ == "RMSprop") or (opt.__name__ == "SGD"):
      optimizer = opt(network.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    else:
      optimizer = opt(network.parameters(), lr=learning_rate, weight_decay=weight_decay)

    
    train_losses = []
    test_losses = []


    # Run the training loop for defined number of epochs
    for epoch in range(0, epochs):
      # Print epoch
      print(f'Starting epoch {epoch+1}')

      # Set current loss and accuracy value for train
      train_loss = 0.0
      epoch_train_accuracy = []


      # Set current loss and accuracy value for test
      test_loss = 0.0
      epoch_test_accuracy = []

      # Iterate over the DataLoader for training data
      for i, data in enumerate(trainloader, 0):
        # Get inputs
        inputs = data[:, 3:].to(torch.float32)
        targets = data[:, [0,1,2]].to(torch.float32)

        # Early stopping
        tolerance_stopper = ToleranceStopper(patience=10, min_delta=1e-4)

        # Zero the gradients
        optimizer.zero_grad() 

        
        # Perform forward pass
        outputs = network(inputs)

        # Compute loss
        loss = loss_function(outputs, targets)
        
        # Perform backward pass
        loss.backward()
        
        # Perform optimization
        optimizer.step()
        
        # Print statistics
        train_loss += loss.item() * inputs.size(0)
      # Print loss values
      epoch_train_loss = train_loss / len(trainloader.sampler.indices)    
      print(f'Training loss: {epoch_train_loss}')
      train_losses.append(epoch_train_loss)
      # Update accuracy
      for output, target in zip(outputs, targets):
        output = 0 if output.item() < 0.5 else 1
        if output == target.item():
          epoch_train_accuracy.append(1)
        else:
          epoch_train_accuracy.append(0)
      
      with torch.no_grad():
        # Iterate over the testing data and generate predictions
        for i, data in enumerate(testloader, 0):

          inputs = data[:, 3:].to(torch.float32)
          targets = data[:, [0,1,2]].to(torch.float32)
        
          outputs = network(inputs)

          loss = loss_function(outputs, targets)
          
          test_loss += loss.item() * inputs.size(0)

        epoch_test_loss = test_loss / len(testloader.sampler.indices)    
        print(f'Test loss: {epoch_test_loss}')
        test_losses.append(epoch_test_loss)

        # Update accuracy
        for output, target in zip(outputs, targets):
          output = 0 if output.item() < 0.5 else 1
          if output == target.item():
            epoch_test_accuracy.append(1)
          else:
            epoch_test_accuracy.append(0)
        
        if tolerance_stopper.tol_stop(epoch_test_loss):
          break

    plot_graph(train_losses,test_losses,epochs)
    return network


In [327]:
def k_fold_model(input_size,hidden_size,output_size,learning_rate,momentum,weight_decay,opt,epochs,trainloader,loss_function,validationloader):
    
    # Init the neural network
    network = Net(input_size, 3, output_size)
    network.apply(reset_weights) #reset weights with random initialization
    
    # Initialize optimizer
    if (opt.__name__ == "RMSprop") or (opt.__name__ == "SGD"):
      optimizer = opt(network.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    else:
      optimizer = opt(network.parameters(), lr=learning_rate, weight_decay=weight_decay)

    
    # Run the training loop for defined number of epochs
    for epoch in range(0, epochs):

      # Print epoch
      #print(f'Starting epoch {epoch+1}')

      # Set current loss value
      train_loss = 0.0
      # Iterate over the DataLoader for training data
      for i, data in enumerate(trainloader, 0):
        # Get inputs
        inputs = data[:, 3:].to(torch.float32)
        targets = data[:, [0,1,2]].to(torch.float32)

        # Early stopping
        tolerance_stopper = ToleranceStopper(patience=10, min_delta=1e-4)

        # Zero the gradients
        optimizer.zero_grad() 
        # Perform forward pass
        outputs = network(inputs)

        # Compute loss
        loss = loss_function(outputs, targets)
        
        # Perform backward pass
        loss.backward()
        
        # Perform optimization
        optimizer.step()
        
        # Print statistics
        train_loss += loss.item() * inputs.size(0)
        #print("loss per item", loss.item())
        #print("inputs size",inputs.size(0))

        # Print loss values
      #print("train loaders length",len(trainloader.sampler.indices))
      avg_train_loss = train_loss / len(trainloader.sampler.indices)    
      #print(f'Training loss: {avg_train_loss}')
      # Print about testing
      #print('Starting validation')

      # Evaluationfor this fold
      valid_loss = 0.0 
      with torch.no_grad():
        # Iterate over the validation data and generate predictions
        for i, data in enumerate(validationloader, 0):

          # Get inputs
          inputs = data[:, 3:].to(torch.float32)
          targets = data[:, [0,1,2]].to(torch.float32)
          
          # Generate outputs
          outputs = network(inputs)

          loss = loss_function(outputs, targets)

          # Calculate loss
          valid_loss += loss.item() * inputs.size(0)
          #print("loss per item", loss.item())
          #print("inputs size",inputs.size(0))
        
        #print("validation loaders length",len(validationloader.sampler.indices))
        avg_valid_loss = valid_loss / len(validationloader.sampler.indices) #used to find the best parameters of the model
        # Early stopping
        if tolerance_stopper.tol_stop(avg_valid_loss):
          break
        # Print validation results
        #print(f'Validation loss: {avg_valid_loss:.4f}')
        

    return avg_valid_loss


In [328]:
input_size = 10
output_size = 3
params_grid = {
    "hidden_size": [2, 3, 4, 5],
    "learning_rate": [0.001, 0.01, 0.1, 0.5],
    "batch_size": [32, 64],
    "weight_decay": [0.0001, 0.001, 0.01],
    "momentum": [0.01, 0.05, 0.1, 0.4],
    "epochs":[5000],
    "optimizer":[torch.optim.SGD]
}

params_grid = {
    "hidden_size": [4],
    "learning_rate": [0.001],
    "batch_size": [64],
    "weight_decay": [0.01],
    "momentum": [0.09],
    "epochs":[400, 600],
    "optimizer":[torch.optim.SGD]
}


# For fold results
results = {}

# Set fixed random number seed
gen = torch.Generator().manual_seed(SEED)
loss_function = nn.MSELoss()

dataset = read_ds(TR_PATH)
#shuffle dataset
np.random.shuffle(dataset)
#split dataset in train and test
dataset_train_part, dataset_test_part = dataset[:int(len(dataset)*0.8)], dataset[int(len(dataset)*0.8):]

#dataset_test_part = read_ds(TS_PATH)

dataset = dataset_train_part


# Define the K-fold Cross Validator
#kfold = CV 



# K-fold Cross Validation model evaluation
best_params = None

actual_it = 0
total_iterations = len(params_grid["epochs"]) * len(params_grid["optimizer"]) * len(params_grid["hidden_size"]) * len(params_grid["learning_rate"]) * len(params_grid["batch_size"]) * len(params_grid["weight_decay"]) * len(params_grid["momentum"])
print(total_iterations)

for epochs, opt, hidden_size, learning_rate, batch_size, weight_decay, momentum in product(params_grid["epochs"],params_grid["optimizer"], params_grid["hidden_size"], params_grid["learning_rate"], params_grid["batch_size"], params_grid["weight_decay"], params_grid["momentum"]):
    validation_avg_loss_fold = 0
    num_iterations = 0
    #print the actual percentage of the grid search
    print(f'Actual iter {(actual_it/total_iterations)*100}%')
    kf = KFold(n_splits=5, random_state=42, shuffle=True)
    kf_split = kf.split(dataset)


    for fold, (train_ids, val_ids) in enumerate(kf_split):
        # Sample elements randomly from a given list of ids, no replacement.
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids, gen) 
        validation_subsampler = torch.utils.data.SubsetRandomSampler(val_ids, gen) 
        # Print
        #print(f'FOLD {fold}')

        #print('--------------------------------')
        # Define data loaders for training and testing data in this fold
        
        trainloader = torch.utils.data.DataLoader(
                        dataset, 
                        batch_size=batch_size, sampler=train_subsampler)
        validationloader = torch.utils.data.DataLoader(
                        dataset,
                        batch_size=batch_size, sampler=validation_subsampler)    
        

        validation_loss = k_fold_model(learning_rate=learning_rate,epochs=epochs,hidden_size=hidden_size,input_size=input_size,loss_function=loss_function,momentum=momentum
                                                    ,opt=opt,output_size=output_size,trainloader=trainloader,weight_decay=weight_decay,validationloader=validationloader)   
        validation_avg_loss_fold  += validation_loss
        num_iterations += 1

    actual_it = actual_it + 1

    #validation average over all folds
    validation_avg_loss_fold /= num_iterations

    #best 
    if best_params is None or validation_avg_loss_fold < best_params[0]:
        best_params = (validation_avg_loss_fold,learning_rate,epochs,hidden_size, loss_function,momentum,opt,weight_decay,batch_size)


print(f"Best loss: {best_params[0]} \nBest hidden size: {best_params[3]} \nBest learning rate: {best_params[1]} \nBest batch size: {best_params[8]} \nBest weight decay: {best_params[7]} \nBest momentum: {best_params[5]}")



2
Actual iter 0.0%
Actual iter 50.0%
Best loss: 48.003238372802734 
Best hidden size: 4 
Best learning rate: 0.001 
Best batch size: 64 
Best weight decay: 0.01 
Best momentum: 0.09


In [329]:
#Best parameters found
print(f"Best loss: {best_params[0]} \nBest hidden size: {best_params[3]} \nBest learning rate: {best_params[1]} \nBest batch size: {best_params[8]} \nBest weight decay: {best_params[7]} \nBest momentum: {best_params[5]}")


Best loss: 48.003238372802734 
Best hidden size: 4 
Best learning rate: 0.001 
Best batch size: 64 
Best weight decay: 0.01 
Best momentum: 0.09


In [330]:
train_subsampler = torch.utils.data.SubsetRandomSampler(range(len(dataset_train_part)), gen)

trainloader = torch.utils.data.DataLoader(
                      dataset_train_part, 
                      batch_size=best_params[8], sampler=train_subsampler)

test_subsampler =  torch.utils.data.SubsetRandomSampler(range(len(dataset_test_part)), gen)
testloader = torch.utils.data.DataLoader(
                      dataset_test_part, 
                      batch_size=best_params[8], sampler=test_subsampler)


best_net = fit_model(learning_rate=best_params[1],epochs=best_params[2],hidden_size=best_params[3],input_size=10,loss_function=best_params[4],momentum=best_params[5],opt=best_params[6],output_size=3,trainloader=trainloader,weight_decay=best_params[7],testloader=testloader)   

Starting epoch 1
Training loss: 777.2550634765626


RuntimeError: a Tensor with 3 elements cannot be converted to Scalar

In [None]:
from sklearn.metrics import accuracy_score, classification_report
test_data = torch.from_numpy(dataset_test_part[:, 3:]).to(torch.float32)
val_labels = torch.from_numpy(dataset_test_part[:, [0,1,2]]).to(torch.float32)

#Print accuracy on test set
test_outputs = best_net(test_data).round().int().view(-1)

y_pred = best_net(test_data)
y_pred = y_pred.round().int().view(-1)
print("accuracy on test set {:.3f}".format(accuracy_score( val_labels,y_pred)))
print(classification_report(val_labels, 
                            y_pred, 
                            target_names=['0', '1']))

#print the confusion matrix
cm = confusion_matrix(val_labels, test_outputs)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
plt.show()
print("------------------------------------------------------------------------------------------------------")


NameError: name 'best_net' is not defined