<a href="https://colab.research.google.com/github/maggieclark/kaggle-floods/blob/main/nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [1]:
# import required packages
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn

import io
import copy

In [2]:
# create GPU 'device'
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
device = get_default_device()
device

device(type='cpu')

In [5]:
# mount drive if necessary
# import
imported = pd.read_csv('drive/MyDrive/Colab Datasets/Kaggle/train.csv')

In [6]:
imported

Unnamed: 0,id,MonsoonIntensity,TopographyDrainage,RiverManagement,Deforestation,Urbanization,ClimateChange,DamsQuality,Siltation,AgriculturalPractices,...,DrainageSystems,CoastalVulnerability,Landslides,Watersheds,DeterioratingInfrastructure,PopulationScore,WetlandLoss,InadequatePlanning,PoliticalFactors,FloodProbability
0,0,5,8,5,8,6,4,4,3,3,...,5,3,3,5,4,7,5,7,3,0.445
1,1,6,7,4,4,8,8,3,5,4,...,7,2,0,3,5,3,3,4,3,0.450
2,2,6,5,6,7,3,7,1,5,4,...,7,3,7,5,6,8,2,3,3,0.530
3,3,3,4,6,5,4,8,4,7,6,...,2,4,7,4,4,6,5,7,5,0.535
4,4,5,3,2,6,4,4,3,3,3,...,2,2,6,6,4,1,2,3,5,0.415
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117952,1117952,3,3,4,10,4,5,5,7,10,...,7,8,7,2,2,1,4,6,4,0.495
1117953,1117953,2,2,4,3,9,5,8,1,3,...,9,4,4,3,7,4,9,4,5,0.480
1117954,1117954,7,3,9,4,6,5,9,1,3,...,5,5,5,5,6,5,5,2,4,0.485
1117955,1117955,7,3,3,7,5,2,3,4,6,...,6,8,5,3,4,6,7,6,4,0.495


In [7]:
# remove id column
imported = imported.drop(columns = 'id')

In [8]:
# divide into train and test
train = imported.sample(frac=2/3, random_state=117)
test = imported.drop(train.index)

cross validation loop for robust accuracy estimates

In [9]:
# create partitions of train

train['split'] = range(len(train))

partitions = []

for _ in range(5):
    # Split the data into train and test sets
    trn, tst = train_test_split(train, test_size=0.2, random_state=117)

    # Append the indices of the test set to the partitions list
    partitions.append(tst['split'].tolist())

# Remove the 'split' column from the original DataFrame
train.drop('split', axis=1, inplace=True)

In [10]:
# create a subclass of Dataset
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data_frame = dataframe # I will supply `train` and `test`
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        features = self.data_frame.iloc[idx, :-1].values.astype(float) # selects all columns besides last
        label = self.data_frame.iloc[idx, -1] # selects last column

        sample = (features, label)

        if self.transform:
            sample = self.transform(sample)

        return sample

In [11]:
# all experiments use batch size of 100
batch_size = 100

In [12]:
# CustomDatasets and DataLoaders for final run

train_dataset_complete = CustomDataset(train) # all 40,000 training rows
test_dataset = CustomDataset(test)

train_loader_complete = torch.utils.data.DataLoader(dataset=train_dataset_complete,
                                            batch_size=batch_size,
                                            shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [13]:
# hyperparameters that will not be tuned (for now)
criterion = nn.MSELoss()

In [14]:
# credit to ML Fall 2023 TA for accuracy function
def Get_MSE(data):
  model.eval()
  SSE = 0
  with torch.no_grad():
    # Iterate through test dataset
    for i, (features, labels) in enumerate(data):
        features=features.float().to(device)
        labels=labels.to(device)
        # Forward pass only to get outputs
        outputs = model(features)
        # Get predictions from the maximum value
        _, predicted = torch.max(outputs.data, 1)
        # squared error
        SE = (predicted - labels)**2
        # sum of squared error
        SSE += SE

  MSE = SSE / labels.size(0)
  return MSE
  model.train()

# Baseline

In [15]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        # second fully connected layer
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)

        return out

In [16]:
# initialize the model and move it to gpu

input_dim = train.shape[1]
hidden_dim = 6
output_dim = 2

model = Model(input_dim, hidden_dim, output_dim).to(device)

In [17]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [18]:
# create dictionary to store accuracy measures
folds = list(range(5))
baseline_mse = {fold: {'train_mse': None, 'val_mse': None} for fold in folds}
baseline_mse

{0: {'train_mse': None, 'val_mse': None},
 1: {'train_mse': None, 'val_mse': None},
 2: {'train_mse': None, 'val_mse': None},
 3: {'train_mse': None, 'val_mse': None},
 4: {'train_mse': None, 'val_mse': None}}

In [19]:
# loop over folds to calculate average training and validation accuracy

num_epochs=100

for f in range(5):

  # split train into train and validation
  v = train.iloc[partitions[f]]
  tr = train.drop(v.index)

  # initialize custom Dataset subclass
  val_dataset = CustomDataset(v)
  train_dataset = CustomDataset(tr)

  # create DataLoaders
  train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
  val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

  # start training
  for epoch in range(num_epochs):
      loss_avg=0

      for i, (features, labels) in enumerate(train_loader):

          # moving features and labels to gpu
          features=features.float().to(device)
          labels=labels.to(device)

          # make prediction
          y_pred = model(features)

          # Calculate Loss
          loss = criterion(y_pred, labels)

          # Resets the gradients of all optimized torch
          optimizer.zero_grad()

          # Getting gradients w.r.t. parameters
          loss.backward()

          # Updating parameters
          optimizer.step()

          loss_avg+=loss.item()*(labels.size(0)/len(train_dataset)) # avg of all batches in epoch

  baseline_mse[f]['train_mse']=Get_MSE(train_loader)
  baseline_mse[f]['val_mse']=Get_MSE(val_loader)
  optimizer.zero_grad()

baseline_mse

RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x20 and 21x6)

In [None]:
data_dict = {
  0: {'train_accuracy': 90.6094, 'val_accuracy': 90.6250},
  1: {'train_accuracy': 90.7188, 'val_accuracy': 90.7950},
  2: {'train_accuracy': 86.6344, 'val_accuracy': 86.4500},
  3: {'train_accuracy': 91.1094, 'val_accuracy': 91.3400},
  4: {'train_accuracy': 90.8594, 'val_accuracy': 90.9250}
}

train_accuracies = [entry['train_accuracy'] for entry in data_dict.values()]
val_accuracies = [entry['val_accuracy'] for entry in data_dict.values()]

mean_train_accuracy = sum(train_accuracies) / len(train_accuracies)
mean_val_accuracy = sum(val_accuracies) / len(val_accuracies)

print("Mean train_accuracy:", mean_train_accuracy)
print("Mean val_accuracy:", mean_val_accuracy)

Mean train_accuracy: 89.98628
Mean val_accuracy: 90.02700000000002


# Hyperparameter 1: number of layers

Three fully connected layers

In [None]:
optimizer.zero_grad()

In [None]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)

        return out

In [None]:
# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

In [None]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
# create dictionary to store accuracy measures
folds = list(range(5))
baseline_acc = {fold: {'train_accuracy': None, 'val_accuracy': None} for fold in folds}
baseline_acc

{0: {'train_accuracy': None, 'val_accuracy': None},
 1: {'train_accuracy': None, 'val_accuracy': None},
 2: {'train_accuracy': None, 'val_accuracy': None},
 3: {'train_accuracy': None, 'val_accuracy': None},
 4: {'train_accuracy': None, 'val_accuracy': None}}

In [None]:
# loop over folds to calculate average training and validation accuracy

num_epochs=100

for f in range(5):

  # split train into train and validation
  v = train.iloc[partitions[f]]
  tr = train.drop(v.index)

  # initialize custom Dataset subclass
  val_dataset = CustomDataset(v)
  train_dataset = CustomDataset(tr)

  # create DataLoaders
  train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
  val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

  # start training
  for epoch in range(num_epochs):
      loss_avg=0

      for i, (features, labels) in enumerate(train_loader):

          # moving features and labels to gpu
          features=features.float().to(device)
          labels=labels.to(device)

          # make prediction
          y_pred = model(features)

          # Calculate Loss: softmax --> cross entropy loss
          loss = criterion(y_pred, labels)

          # Resets the gradients of all optimized torch
          optimizer.zero_grad()

          # Getting gradients w.r.t. parameters
          loss.backward()

          # Updating parameters
          optimizer.step()

          loss_avg+=loss.item()*(labels.size(0)/len(train_dataset)) # avg of all batches in epoch

  baseline_acc[f]['train_accuracy']=Get_Accuracy(train_loader)
  baseline_acc[f]['val_accuracy']=Get_Accuracy(val_loader)
  optimizer.zero_grad()

baseline_acc

{0: {'train_accuracy': tensor(90.7281), 'val_accuracy': tensor(91.2000)},
 1: {'train_accuracy': tensor(93.3344), 'val_accuracy': tensor(93.2900)},
 2: {'train_accuracy': tensor(92.0719), 'val_accuracy': tensor(92.0250)},
 3: {'train_accuracy': tensor(93.4688), 'val_accuracy': tensor(93.3150)},
 4: {'train_accuracy': tensor(93.7375), 'val_accuracy': tensor(93.7350)}}

In [None]:
train_accuracies = [entry['train_accuracy'] for entry in baseline_acc.values()]
val_accuracies = [entry['val_accuracy'] for entry in baseline_acc.values()]

mean_train_accuracy = sum(train_accuracies) / len(train_accuracies)
mean_val_accuracy = sum(val_accuracies) / len(val_accuracies)

print("Mean train_accuracy:", mean_train_accuracy)
print("Mean val_accuracy:", mean_val_accuracy)

TypeError: ignored

Four fully connected layers

In [None]:
optimizer.zero_grad()

In [None]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, hidden3_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, hidden3_dim)

        # fourth fully connected layer
        self.fc4 = nn.Linear(hidden3_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)
        out = self.RELU(out)
        out = self.fc4(out)

        return out

In [None]:
# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
hidden3_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, hidden3_dim, output_dim).to(device)

In [None]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
# create dictionary to store accuracy measures
folds = list(range(5))
baseline_acc = {fold: {'train_accuracy': None, 'val_accuracy': None} for fold in folds}
baseline_acc

{0: {'train_accuracy': None, 'val_accuracy': None},
 1: {'train_accuracy': None, 'val_accuracy': None},
 2: {'train_accuracy': None, 'val_accuracy': None},
 3: {'train_accuracy': None, 'val_accuracy': None},
 4: {'train_accuracy': None, 'val_accuracy': None}}

In [None]:
# loop over folds to calculate average training and validation accuracy

num_epochs=100

for f in range(5):

  # split train into train and validation
  v = train.iloc[partitions[f]]
  tr = train.drop(v.index)

  # initialize custom Dataset subclass
  val_dataset = CustomDataset(v)
  train_dataset = CustomDataset(tr)

  # create DataLoaders
  train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
  val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

  # start training
  for epoch in range(num_epochs):
      loss_avg=0

      for i, (features, labels) in enumerate(train_loader):

          # moving features and labels to gpu
          features=features.float().to(device)
          labels=labels.to(device)

          # make prediction
          y_pred = model(features)

          # Calculate Loss: softmax --> cross entropy loss
          loss = criterion(y_pred, labels)

          # Resets the gradients of all optimized torch
          optimizer.zero_grad()

          # Getting gradients w.r.t. parameters
          loss.backward()

          # Updating parameters
          optimizer.step()

          loss_avg+=loss.item()*(labels.size(0)/len(train_dataset)) # avg of all batches in epoch

  baseline_acc[f]['train_accuracy']=Get_Accuracy(train_loader)
  baseline_acc[f]['val_accuracy']=Get_Accuracy(val_loader)
  optimizer.zero_grad()

baseline_acc

{0: {'train_accuracy': tensor(91.1406), 'val_accuracy': tensor(91.6100)},
 1: {'train_accuracy': tensor(91.0375), 'val_accuracy': tensor(91.2550)},
 2: {'train_accuracy': tensor(90.0125), 'val_accuracy': tensor(90.2350)},
 3: {'train_accuracy': tensor(92.3844), 'val_accuracy': tensor(92.7250)},
 4: {'train_accuracy': tensor(93.0437), 'val_accuracy': tensor(93.3600)}}

In [None]:
train_accuracies = [entry['train_accuracy'] for entry in baseline_acc.values()]
val_accuracies = [entry['val_accuracy'] for entry in baseline_acc.values()]

mean_train_accuracy = sum(train_accuracies) / len(train_accuracies)
mean_val_accuracy = sum(val_accuracies) / len(val_accuracies)

print("Mean train_accuracy:", mean_train_accuracy)
print("Mean val_accuracy:", mean_val_accuracy)

Mean train_accuracy: tensor(91.5237)
Mean val_accuracy: tensor(91.8370)


# Hyperparameter 2: learning rate

Test a series of LRs with one hidden layer, each LR should be tested across 5 folds

In [None]:
optimizer.zero_grad()

In [None]:
# pandas dataframe of learning rates and folds (train)
learning_rates = [0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.4]
folds = range(5)

lr_acc_tr = pd.DataFrame(index=learning_rates, columns=folds)

print(lr_acc_tr)

         0    1    2    3    4
0.001  NaN  NaN  NaN  NaN  NaN
0.010  NaN  NaN  NaN  NaN  NaN
0.025  NaN  NaN  NaN  NaN  NaN
0.050  NaN  NaN  NaN  NaN  NaN
0.100  NaN  NaN  NaN  NaN  NaN
0.200  NaN  NaN  NaN  NaN  NaN
0.400  NaN  NaN  NaN  NaN  NaN


In [None]:
# pandas dataframe of learning rates and folds (val)
learning_rates = [0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.4]
folds = range(5)

lr_acc_v = pd.DataFrame(index=learning_rates, columns=folds)

print(lr_acc_v)

         0    1    2    3    4
0.001  NaN  NaN  NaN  NaN  NaN
0.010  NaN  NaN  NaN  NaN  NaN
0.025  NaN  NaN  NaN  NaN  NaN
0.050  NaN  NaN  NaN  NaN  NaN
0.100  NaN  NaN  NaN  NaN  NaN
0.200  NaN  NaN  NaN  NaN  NaN
0.400  NaN  NaN  NaN  NaN  NaN


In [None]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)

        return out

In [None]:
# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

NameError: ignored

In [None]:
# loop over learning_rates, train a nn for each and store accuracy

num_epochs=100

for lr in learning_rates:
  optimizer = torch.optim.SGD(model.parameters(), lr=lr)

  # loop over folds to calculate average training and validation accuracy

  for f in range(5):

    # split train into train and validation
    v = train.iloc[partitions[f]]
    tr = train.drop(v.index)

    # initialize custom Dataset subclass
    val_dataset = CustomDataset(v)
    train_dataset = CustomDataset(tr)

    # create DataLoaders
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size,
                                            shuffle=False)

    # start training
    for epoch in range(num_epochs):
        loss_avg=0

        for i, (features, labels) in enumerate(train_loader):

            # moving features and labels to gpu
            features=features.float().to(device)
            labels=labels.to(device)

            # make prediction
            y_pred = model(features)

            # Calculate Loss: softmax --> cross entropy loss
            loss = criterion(y_pred, labels)

            # Resets the gradients of all optimized torch
            optimizer.zero_grad()

            # Getting gradients w.r.t. parameters
            loss.backward()

            # Updating parameters
            optimizer.step()

            loss_avg+=loss.item()*(labels.size(0)/len(train_dataset)) # avg of all batches in epoch

    lr_acc_tr.loc[lr,f]=Get_Accuracy(train_loader).item()
    lr_acc_v.loc[lr,f]=Get_Accuracy(val_loader).item()
    optimizer.zero_grad()
  print(lr, ' completed')

print(lr_acc_tr)
print(lr_acc_v)

NameError: ignored

In [None]:
lr_acc_tr['mean'] = lr_acc_tr.mean(axis=1)
lr_acc_tr

Unnamed: 0,0,1,2,3,4,mean
0.001,,,,,,
0.01,,,,,,
0.025,,,,,,
0.05,,,,,,
0.1,,,,,,
0.2,,,,,,
0.4,,,,,,


In [None]:
lr_acc_v['mean'] = lr_acc_v.mean(axis=1)
lr_acc_v

Unnamed: 0,0,1,2,3,4,mean
0.001,,,,,,
0.01,,,,,,
0.025,,,,,,
0.05,,,,,,
0.1,,,,,,
0.2,,,,,,
0.4,,,,,,


# Hyperparameter 3: Momentum

In [None]:
optimizer.zero_grad()

NameError: ignored

In [None]:
# pandas dataframes of momentum values and folds
momentums = [0.001, 0.01, 0.05, 0.1, 0.25, 0.5]
folds = range(5)

m_acc_tr = pd.DataFrame(index=momentums, columns=folds)

m_acc_v = pd.DataFrame(index=momentums, columns=folds)

In [None]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)

        return out

In [None]:
# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

In [None]:
# loop over momentums, train a nn for each and store accuracy

num_epochs=100

for m in momentums:
  print("momentum = ", m)
  optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=m)

  # loop over folds to calculate average training and validation accuracy

  for f in range(5):
    print ('fold = ', f)
    # split train into train and validation
    v = train.iloc[partitions[f]]
    tr = train.drop(v.index)

    # initialize custom Dataset subclass
    val_dataset = CustomDataset(v)
    train_dataset = CustomDataset(tr)

    # create DataLoaders
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size,
                                            shuffle=False)

    # start training
    for epoch in range(num_epochs):

        loss_avg=0

        for i, (features, labels) in enumerate(train_loader):

            # moving features and labels to gpu
            features=features.float().to(device)
            labels=labels.to(device)

            # make prediction
            y_pred = model(features)

            # Calculate Loss: softmax --> cross entropy loss
            loss = criterion(y_pred, labels)

            # Resets the gradients of all optimized torch
            optimizer.zero_grad()

            # Getting gradients w.r.t. parameters
            loss.backward()

            # Updating parameters
            optimizer.step()

    m_acc_tr.loc[m,f]=Get_Accuracy(train_loader).item()
    m_acc_v.loc[m,f]=Get_Accuracy(val_loader).item()
    optimizer.zero_grad()

momentum =  0.15
fold =  0
              0    1    2    3    4
0.15  89.662498  NaN  NaN  NaN  NaN
0.20        NaN  NaN  NaN  NaN  NaN
              0    1    2    3    4
0.15  89.730003  NaN  NaN  NaN  NaN
0.20        NaN  NaN  NaN  NaN  NaN
fold =  1
              0          1    2    3    4
0.15  89.662498  89.065628  NaN  NaN  NaN
0.20        NaN        NaN  NaN  NaN  NaN
              0       1    2    3    4
0.15  89.730003  89.195  NaN  NaN  NaN
0.20        NaN     NaN  NaN  NaN  NaN
fold =  2
              0          1          2    3    4
0.15  89.662498  89.065628  90.103127  NaN  NaN
0.20        NaN        NaN        NaN  NaN  NaN
              0       1          2    3    4
0.15  89.730003  89.195  90.285004  NaN  NaN
0.20        NaN     NaN        NaN  NaN  NaN
fold =  3
              0          1          2       3    4
0.15  89.662498  89.065628  90.103127  88.875  NaN
0.20        NaN        NaN        NaN     NaN  NaN
              0       1          2          3    4
0

In [None]:
m_acc_tr['mean'] = m_acc_tr.mean(axis=1)
m_acc_tr

Unnamed: 0,0,1,2,3,4,mean
0.15,89.662498,89.065628,90.103127,88.875,82.503128,88.041876
0.2,89.287498,89.459373,89.790627,90.259377,90.515625,89.8625


In [None]:
m_acc_v['mean'] = m_acc_v.mean(axis=1)
m_acc_v

Unnamed: 0,0,1,2,3,4,mean
0.15,89.730003,89.195,90.285004,88.864998,82.764999,88.168001
0.2,89.445,89.43,90.029999,90.379997,90.690002,89.995


# Hyperparameter 4: batch normalization

In [None]:
optimizer.zero_grad()

In [None]:
# create dictionary to store accuracy measures
folds = list(range(5))
batchnorm_acc = {fold: {'train_accuracy': None, 'val_accuracy': None} for fold in folds}
batchnorm_acc

{0: {'train_accuracy': None, 'val_accuracy': None},
 1: {'train_accuracy': None, 'val_accuracy': None},
 2: {'train_accuracy': None, 'val_accuracy': None},
 3: {'train_accuracy': None, 'val_accuracy': None},
 4: {'train_accuracy': None, 'val_accuracy': None}}

In [None]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # first batch norm
        self.batchnorm1 = nn.BatchNorm1d(hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # second batch norm
        self.batchnorm2 = nn.BatchNorm1d(hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.batchnorm1(out)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.batchnorm2(out)
        out = self.RELU(out)
        out = self.fc3(out)

        return out

# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

# set established hyperparameters
learning_rate = 0.1
best_momentum = 0.25
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=best_momentum)

In [None]:
# loop over folds to calculate average training and validation accuracy

num_epochs=100

for f in range(5):

  # split train into train and validation
  v = train.iloc[partitions[f]]
  tr = train.drop(v.index)

  # initialize custom Dataset subclass
  val_dataset = CustomDataset(v)
  train_dataset = CustomDataset(tr)

  # create DataLoaders
  train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
  val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

  # start training
  for epoch in range(num_epochs):
      loss_avg=0

      for i, (features, labels) in enumerate(train_loader):

          # moving features and labels to gpu
          features=features.float().to(device)
          labels=labels.to(device)

          # make prediction
          y_pred = model(features)

          # Calculate Loss: softmax --> cross entropy loss
          loss = criterion(y_pred, labels)

          # Resets the gradients of all optimized torch
          optimizer.zero_grad()

          # Getting gradients w.r.t. parameters
          loss.backward()

          # Updating parameters
          optimizer.step()

          loss_avg+=loss.item()*(labels.size(0)/len(train_dataset)) # avg of all batches in epoch

  batchnorm_acc[f]['train_accuracy']=Get_Accuracy(train_loader)
  batchnorm_acc[f]['val_accuracy']=Get_Accuracy(val_loader)
  optimizer.zero_grad()

batchnorm_acc

{0: {'train_accuracy': tensor(91.6531), 'val_accuracy': tensor(91.5950)},
 1: {'train_accuracy': tensor(91.6594), 'val_accuracy': tensor(91.7500)},
 2: {'train_accuracy': tensor(91.9219), 'val_accuracy': tensor(92.1100)},
 3: {'train_accuracy': tensor(91.9031), 'val_accuracy': tensor(92.0450)},
 4: {'train_accuracy': tensor(91.5406), 'val_accuracy': tensor(91.7100)}}

In [None]:
train_accuracies = [entry['train_accuracy'] for entry in batchnorm_acc.values()]
val_accuracies = [entry['val_accuracy'] for entry in batchnorm_acc.values()]

mean_train_accuracy = sum(train_accuracies) / len(train_accuracies)
mean_val_accuracy = sum(val_accuracies) / len(val_accuracies)

print("Mean train_accuracy:", mean_train_accuracy)
print("Mean val_accuracy:", mean_val_accuracy)

Mean train_accuracy: tensor(91.7356)
Mean val_accuracy: tensor(91.8420)


# Hyperparameter 5 and 6: dropout and early stopping

The effects of these cannot be tested on 100 epochs, because the network is not overfitting at this point. I will run five early-stopped models with dropout and 5 without. Whichever choice has the best mean accuracy will be my final model. I can then test it on the reserved test data.

In [None]:
# early stopping function (credit to Jeff Heaton)
class EarlyStopping():
    def __init__(self, patience=20, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_model = None
        self.best_acc = None
        self.counter = 0

    def __call__(self, model, val_acc):
        # if we don't have a recorded best accuracy, set one
        if self.best_acc == None:
            self.best_acc = val_acc
            self.best_model = copy.deepcopy(model)
        # if acc improves, reset counter, save accuracy, save weights
        elif val_acc - self.best_acc > self.min_delta:
            self.best_acc = val_acc
            self.counter = 0
            self.best_model.load_state_dict(model.state_dict())
        # if acc does not improve, increment counter
        elif val_acc - self.best_acc  <= self.min_delta:
            self.counter += 1
            # if that increment exceeds patience, load the best_model weights into model
            # then return True
            if self.counter >= self.patience:
                model.load_state_dict(self.best_model.state_dict())
                return True


        # always return False unless patience is exceeded
        return False


In [None]:
# create dictionaries to store accuracy measures
folds = list(range(5))
withdropout_acc = {fold: {'train_accuracy': None, 'val_accuracy': None} for fold in folds}
nodropout_acc = {fold: {'train_accuracy': None, 'val_accuracy': None} for fold in folds}

With dropout

In [None]:
optimizer.zero_grad()

In [None]:
# definition of accuracy which allows for a different feedforward process for testing accuracy
def Get_Accuracy(data):
  model.eval()
  softmax=nn.Softmax(dim=1)
  correct = 0
  total = 0
  with torch.no_grad():
    # Iterate through test dataset
    for i, (features, labels) in enumerate(data):

        features=features.float().to(device)
        labels=labels.to(device)

        # Forward pass only to get logits/output
        outputs = softmax(model.forward_all_neurons(features)) # here you do need softmax
        # Get predictions from the maximum value
        _, predicted = torch.max(outputs.data, 1)

        # Total number of labels
        total += labels.size(0)

        # Total correct predictions
        correct += (predicted == labels).sum()

  accuracy = 100 * correct / total
  return accuracy
  model.train()

In [None]:
# WITH dropout

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # first dropout
        self.dropout1 = nn.Dropout(0.5)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # second dropout
        self.dropout2 = nn.Dropout(0.2)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)


    def forward(self, x):
        # feedforward process for training
        out = self.fc1(x)
        out = self.dropout1(out)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.dropout2(out)
        out = self.RELU(out)
        out = self.fc3(out)
        return out

    def forward_all_neurons(self, x):
        # feedforward process for prediction
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)
        return out

# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

# set established hyperparameters
learning_rate = 0.05
best_momentum = 0.25
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=best_momentum)

In [None]:
# loop over folds to calculate average training and validation accuracy

num_epochs=1000

for f in range(5):
    print('fold: ', f)
    # clear old stuff for new fold
    es=EarlyStopping()
    done = False
    optimizer.zero_grad()

    # split train into train and validation
    v = train.iloc[partitions[f]]
    tr = train.drop(v.index)

    # initialize custom Dataset subclass
    val_dataset = CustomDataset(v)
    train_dataset = CustomDataset(tr)

    # create DataLoaders
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size,
                                            shuffle=False)

    # start training
    epoch=0

    while epoch < num_epochs and not done:
        epoch += 1

        for i, (features, labels) in enumerate(train_loader):

            # moving features and labels to gpu
            features=features.float().to(device)
            labels=labels.to(device)

            # make prediction
            y_pred = model.forward(features)

            # Calculate Loss: softmax --> cross entropy loss
            loss = criterion(y_pred, labels)

            # Resets the gradients of all optimized torch
            optimizer.zero_grad()

            # Getting gradients w.r.t. parameters
            loss.backward()

            # Updating parameters
            optimizer.step()

        # epoch validation accuracy calculation for early stopping
        epoch_val_acc=Get_Accuracy(val_loader).item()

        # if early stopping evaluates to true, set done = true
        if es(model, epoch_val_acc):
            done = True
            print('stopped on fold ', f, 'epoch ', epoch)


    withdropout_acc[f]['train_accuracy']=Get_Accuracy(train_loader).item()
    withdropout_acc[f]['val_accuracy']=Get_Accuracy(val_loader).item()
    optimizer.zero_grad()

withdropout_acc

fold:  0
stopped on fold  0 epoch  101
fold:  1
stopped on fold  1 epoch  96
fold:  2
stopped on fold  2 epoch  41
fold:  3
stopped on fold  3 epoch  37
fold:  4
stopped on fold  4 epoch  23


{0: {'train_accuracy': 91.88749694824219, 'val_accuracy': 91.86250305175781},
 1: {'train_accuracy': 92.4906234741211, 'val_accuracy': 92.51249694824219},
 2: {'train_accuracy': 92.51875305175781, 'val_accuracy': 92.76249694824219},
 3: {'train_accuracy': 92.4625015258789, 'val_accuracy': 92.7249984741211},
 4: {'train_accuracy': 92.1031265258789, 'val_accuracy': 92.5}}

In [None]:
train_accuracies = [entry['train_accuracy'] for entry in withdropout_acc.values()]
val_accuracies = [entry['val_accuracy'] for entry in withdropout_acc.values()]

mean_train_accuracy = sum(train_accuracies) / len(train_accuracies)
mean_val_accuracy = sum(val_accuracies) / len(val_accuracies)

print("Mean train_accuracy:", mean_train_accuracy)
print("Mean val_accuracy:", mean_val_accuracy)

Mean train_accuracy: 92.29250030517578
Mean val_accuracy: 92.47249908447266


Without dropout

In [None]:
optimizer.zero_grad()

In [None]:
# WITHOUT dropout

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)

        return out

# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

# set established hyperparameters
learning_rate = 0.05
best_momentum = 0.25
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=best_momentum)

In [None]:
# loop over folds to calculate average training and validation accuracy

num_epochs=1000

for f in range(5):
    print('fold: ', f)
    # clear old stuff for new fold
    es=EarlyStopping()
    done = False

    # split train into train and validation
    v = train.iloc[partitions[f]]
    tr = train.drop(v.index)

    # initialize custom Dataset subclass
    val_dataset = CustomDataset(v)
    train_dataset = CustomDataset(tr)

    # create DataLoaders
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size,
                                            shuffle=False)

    # start training
    epoch=0

    while epoch < num_epochs and not done:
        epoch += 1

        for i, (features, labels) in enumerate(train_loader):

            # moving features and labels to gpu
            features=features.float().to(device)
            labels=labels.to(device)

            # make prediction
            y_pred = model(features)

            # Calculate Loss: softmax --> cross entropy loss
            loss = criterion(y_pred, labels)

            # Resets the gradients of all optimized torch
            optimizer.zero_grad()

            # Getting gradients w.r.t. parameters
            loss.backward()

            # Updating parameters
            optimizer.step()

        # epoch validation accuracy calculation for early stopping
        epoch_val_acc=Get_Accuracy(val_loader).item()

        # if early stopping evaluates to true, set done = true
        if es(model, epoch_val_acc):
            done = True
            print('stopped on fold ', f, 'epoch ', epoch)


    nodropout_acc[f]['train_accuracy']=Get_Accuracy(train_loader).item()
    nodropout_acc[f]['val_accuracy']=Get_Accuracy(val_loader).item()
    optimizer.zero_grad()

nodropout_acc

fold:  0
stopped on fold  0 epoch  32
fold:  1
stopped on fold  1 epoch  27
fold:  2
stopped on fold  2 epoch  42
fold:  3
stopped on fold  3 epoch  22
fold:  4
stopped on fold  4 epoch  44


{0: {'train_accuracy': 92.6312484741211, 'val_accuracy': 92.82499694824219},
 1: {'train_accuracy': 92.55937194824219, 'val_accuracy': 92.80000305175781},
 2: {'train_accuracy': 92.47187805175781, 'val_accuracy': 92.8375015258789},
 3: {'train_accuracy': 92.7281265258789, 'val_accuracy': 92.76249694824219},
 4: {'train_accuracy': 92.6624984741211, 'val_accuracy': 92.7874984741211}}

In [None]:
train_accuracies = [entry['train_accuracy'] for entry in nodropout_acc.values()]
val_accuracies = [entry['val_accuracy'] for entry in nodropout_acc.values()]

mean_train_accuracy = sum(train_accuracies) / len(train_accuracies)
mean_val_accuracy = sum(val_accuracies) / len(val_accuracies)

print("Mean train_accuracy:", mean_train_accuracy)
print("Mean val_accuracy:", mean_val_accuracy)

Mean train_accuracy: 92.61062469482422
Mean val_accuracy: 92.80249938964843


# Final training

In [None]:
optimizer.zero_grad()

In [None]:
# create a subclass of nn.Module to hold model structure

class Model(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Model, self).__init__()

        # first fully connected layer
        self.fc1 = nn.Linear(input_dim, hidden1_dim)

        # second fully connected layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)

        # third fully connected layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)

        # Relu activation function
        self.RELU = nn.ReLU()

        self.softmax=nn.Softmax(dim=1)



    def forward(self, x):

        # feedforward process
        out = self.fc1(x)
        out = self.RELU(out)
        out = self.fc2(out)
        out = self.RELU(out)
        out = self.fc3(out)

        return out

# initialize the model and move it to gpu

input_dim = 12
hidden1_dim = 6
hidden2_dim = 6
output_dim = 2

model = Model(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)

# established params
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.25)
num_epochs=100

In [None]:
# start training
for epoch in range(num_epochs):

    for i, (features, labels) in enumerate(train_loader_complete):

        # moving features and labels to gpu
        features=features.float().to(device)
        labels=labels.to(device)

        # make prediction
        y_pred = model(features)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(y_pred, labels)

        # Resets the gradients of all optimized torch
        optimizer.zero_grad()

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

AttributeError: ignored

In [None]:
print('training accuracy: ', Get_Accuracy(train_loader_complete).item())
print('test accuracy: ', Get_Accuracy(test_loader).item())

training accuracy:  92.56500244140625
test accuracy:  92.8550033569336
