# Project: Facial emotion recognition
## MTH767P - Group 4

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
from pathlib import Path
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms

from utils import *

## Data loading and pre-processing 

#### Read data

In [3]:
# define dictionary for emotions
emotion_dict = {0: 'Angry', 
                1: 'Disgust', 
                2: 'Fear',
                3: 'Happy', 
                4: 'Sad',
                5: 'Surprise',
                6: 'Neutral'}

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5), (0.5))])
dataset = EmotionsDataset(root='./dataset', fname='icml_face_data.csv', transform=transform)

#### Visualize data


In [6]:
# generate sample of images
#fig, ax = plt.subplots(5, len(emotion_dict), figsize=(8, 4.5))

#for i, em in emotion_dict.items():
    #emotion = dataset.data[dataset.targets==i][:5]
    # set title
    #ax[0, i].set_title(em)
    #for j in range(emotion.shape[0]):      
        #im = emotion[j]
        #ax[j, i].imshow(im, cmap='gray')
        #ax[j, i].axis('off')
#plt.tight_layout();
#plt.savefig(output_path/'dataset_sample.png')

#### Split dataset

In [7]:
batch_size = 128
trainloader, testloader = dataset.split(ratio=0.8, batch_size=batch_size)

---
## Convolutional neural network setup 
### Network architecture

In [34]:
# Architecture of a simple convnet
# Specify your model by providing the list of dicts, each containing the name of the layer class (case-insensitive) 
# and the parameters necessary to initialise it. The in_channels and in_features are not necessary as they are 
# calculated automatically
# If the layer contains the activation function, give an initialised object of the relevant class

layers = [dict(ltype='conv2d', out_channels=6, kernel=7, activation=nn.ReLU(True)),
          dict(ltype='maxpool2d', kernel=2, stride=2),
          dict(ltype='conv2d', out_channels=16, kernel=5),
          dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
          dict(ltype='linear', out_features=120, activation=nn.ReLU(True)),
          dict(ltype='linear', out_features=7)
         ]
net = ConvNet(layers)

In [41]:
# Architecture with localization network (see Minaee 2021, and code in https://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html)

atn_layers = dict(
            attention=[dict(ltype='conv2d', out_channels=8, kernel=3),
                       dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                       dict(ltype='conv2d', out_channels=10, kernel=3),
                       dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                       dict(ltype='linear', out_features=48, activation=nn.ReLU(True)),
                       dict(ltype='linear', out_features=3*2)
                      ],
            features=[dict(ltype='conv2d', out_channels=10, kernel=3, activation=nn.ReLU(True)),
                      dict(ltype='conv2d', out_channels=10, kernel=3),
                      dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                      dict(ltype='conv2d', out_channels=10, kernel=3, activation=nn.ReLU(True)),
                      dict(ltype='conv2d', out_channels=10, kernel=3),
                      dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                      dict(ltype='dropout', p=0.5),
                      dict(ltype='linear', out_features=50),
                      dict(ltype='linear', out_features=7)
                     ])
net = AttentionalNet(atn_layers)

In [42]:
# Weight initialisation
@torch.no_grad()
def init_weights(m):
    if isinstance(m, (nn.Conv2d, nn.Linear)):
        nn.init.normal_(m.weight, 0, 0.05)
        nn.init.normal_(m.bias, 0, 0.05)
    
net = net.apply(init_weights)

In [16]:
# Display the architecture in plain text format
net

AttentionalNet(
  (localization): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
    (3): Conv2d(8, 10, kernel_size=(3, 3), stride=(1, 1))
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): ReLU(inplace=True)
  )
  (fc_loc): Sequential(
    (0): Linear(in_features=1000, out_features=48, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=48, out_features=6, bias=True)
  )
  (conv): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): ReLU(inplace=True)
    (5): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (8): 

### Loss function and optimizer

In [43]:
# Learning rate calc
stdata = (dataset.data / 255 - 0.5) / 0.5
lr = 3.9/np.linalg.norm(stdata.reshape(-1, 48*48))**2
lr = 0.005

# L2 regularisation parameter
lmda = 0.001

# Re-weighting classes for imbalance
weights = 1 / np.unique(dataset.targets[dataset.train_idxs], return_counts=True)[1]

In [44]:
criterion = nn.CrossEntropyLoss(torch.tensor(weights / np.sum(weights), dtype=torch.float))
optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=lmda)  # optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
scheduler = None  # optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)  # for adaptive learning rate 

---
## Training the neural network
### Hyper parameters tuning

In [325]:
# Define number of folds to split the datasets into
# Define a grid of parameter values to check (if more then one param is tested - supply list of tuples as a grid)
# Define the objective function by using cross_validate() method that will accept iterated values of object(s) using 
# the parameter(s)

from itertools import product

# Hyper parameters
lmdas = np.append(np.linspace(0, 0.01, 2), np.linspace(0.02, 0.1, 3))  # <-- regularisation parameters
lrs = pow(10, -np.linspace(1, 3, 3))  # <-- learning rates
dps = [0.3, 0.5]  # np.linspace(0.1, 0.5, 3)  # <-- dropout rates
grid = list(product(lmdas, lrs, dps))

Kfolds = 5

def eval_gridpoint(pt):
    global atn_layers, Kfolds, dataset, criterion, scheduler, emotion_dict, batch_size
    l, lr, dp = pt
    opt = optim.Adam(net.parameters(), lr=lr, weight_decay=l)
    do_update = dict(ltype='dropout', p=dp)
    new_spec = atn_layers.copy()
    new_spec['features'][-3] = do_update
    new_net = AttentionalNet(new_spec)
    return cross_validate(Kfolds, dataset, new_net, criterion, opt, scheduler, emotion_dict, batch_size) 

errors = []  # <-- in case there is an error during execution, we will still retain all previously calculated values
cur_best = (0.01, 0.001, 0.3)
bestval = grid_search(eval_gridpoint, grid, 'Regularisation, learning rate, dropout rate', out=errors)

Testing Regularisation, learning rate, dropout rate value (0.0, 0.1, 0.3)...
Fold 1:
Epoch 1


5742it [01:51, 51.73it/s]


Finished training. Avg loss: 1.9506


100%|██████████| 5742/5742 [00:16<00:00, 338.84it/s]


Fold 2:
Epoch 1


5742it [01:50, 51.83it/s]


Finished training. Avg loss: 1.9509


100%|██████████| 5742/5742 [00:15<00:00, 359.65it/s]


Fold 3:
Epoch 1


5742it [01:49, 52.62it/s]


Finished training. Avg loss: 1.9507


100%|██████████| 5742/5742 [00:16<00:00, 351.24it/s]


Fold 4:
Epoch 1


5742it [01:49, 52.39it/s]


Finished training. Avg loss: 1.9505


100%|██████████| 5742/5742 [00:15<00:00, 361.61it/s]


Fold 5:
Epoch 1


5742it [01:51, 51.67it/s]


Finished training. Avg loss: 1.9506


100%|██████████| 5741/5741 [00:16<00:00, 352.90it/s]


Done, error=0.856282
Testing Regularisation, learning rate, dropout rate value (0.0, 0.1, 0.5)...
Fold 1:
Epoch 1


5742it [01:52, 51.04it/s]


Finished training. Avg loss: 1.9784


100%|██████████| 5742/5742 [00:17<00:00, 328.70it/s]


Fold 2:
Epoch 1


5742it [01:53, 50.55it/s]


Finished training. Avg loss: 1.9788


100%|██████████| 5742/5742 [00:16<00:00, 341.19it/s]


Fold 3:
Epoch 1


5742it [01:52, 50.81it/s]


Finished training. Avg loss: 1.9786


100%|██████████| 5742/5742 [00:17<00:00, 336.44it/s]


Fold 4:
Epoch 1


5742it [01:55, 49.64it/s]


Finished training. Avg loss: 1.9786


100%|██████████| 5742/5742 [00:16<00:00, 346.69it/s]


Fold 5:
Epoch 1


5742it [01:53, 50.58it/s]


Finished training. Avg loss: 1.9792


100%|██████████| 5741/5741 [00:16<00:00, 346.62it/s]


Done, error=0.983524
Testing Regularisation, learning rate, dropout rate value (0.0, 0.01, 0.3)...
Fold 1:
Epoch 1


5742it [01:51, 51.50it/s]


Finished training. Avg loss: 1.9530


100%|██████████| 5742/5742 [00:16<00:00, 344.92it/s]


Fold 2:
Epoch 1


5742it [01:52, 51.08it/s]


Finished training. Avg loss: 1.9542


100%|██████████| 5742/5742 [00:16<00:00, 349.44it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.67it/s]


Finished training. Avg loss: 1.9533


100%|██████████| 5742/5742 [00:16<00:00, 347.43it/s]


Fold 4:
Epoch 1


5742it [01:54, 50.29it/s]


Finished training. Avg loss: 1.9535


100%|██████████| 5742/5742 [00:16<00:00, 343.73it/s]


Fold 5:
Epoch 1


5742it [01:53, 50.39it/s]


Finished training. Avg loss: 1.9539


100%|██████████| 5741/5741 [00:17<00:00, 323.28it/s]


Done, error=0.860880
Testing Regularisation, learning rate, dropout rate value (0.0, 0.01, 0.5)...
Fold 1:
Epoch 1


5742it [01:52, 50.86it/s]


Finished training. Avg loss: 1.9638


100%|██████████| 5742/5742 [00:16<00:00, 341.91it/s]


Fold 2:
Epoch 1


5742it [01:53, 50.73it/s]


Finished training. Avg loss: 1.9639


100%|██████████| 5742/5742 [00:16<00:00, 339.92it/s]


Fold 3:
Epoch 1


5742it [01:53, 50.67it/s]


Finished training. Avg loss: 1.9641


100%|██████████| 5742/5742 [00:16<00:00, 338.47it/s]


Fold 4:
Epoch 1


5742it [01:53, 50.55it/s]


Finished training. Avg loss: 1.9638


100%|██████████| 5742/5742 [00:16<00:00, 342.54it/s]


Fold 5:
Epoch 1


5742it [01:55, 49.64it/s]


Finished training. Avg loss: 1.9643


100%|██████████| 5741/5741 [00:17<00:00, 324.41it/s]


Done, error=0.860845
Testing Regularisation, learning rate, dropout rate value (0.0, 0.001, 0.3)...
Fold 1:
Epoch 1


5742it [01:53, 50.60it/s]


Finished training. Avg loss: 1.9553


100%|██████████| 5742/5742 [00:16<00:00, 348.97it/s]


Fold 2:
Epoch 1


5742it [01:51, 51.58it/s]


Finished training. Avg loss: 1.9550


100%|██████████| 5742/5742 [00:16<00:00, 347.14it/s]


Fold 3:
Epoch 1


5742it [01:51, 51.57it/s]


Finished training. Avg loss: 1.9546


100%|██████████| 5742/5742 [00:16<00:00, 345.88it/s]


Fold 4:
Epoch 1


5742it [01:51, 51.47it/s]


Finished training. Avg loss: 1.9552


100%|██████████| 5742/5742 [00:16<00:00, 348.53it/s]


Fold 5:
Epoch 1


5742it [01:54, 50.07it/s]


Finished training. Avg loss: 1.9545


100%|██████████| 5741/5741 [00:16<00:00, 341.48it/s]


Done, error=0.984813
Testing Regularisation, learning rate, dropout rate value (0.0, 0.001, 0.5)...
Fold 1:
Epoch 1


5742it [01:51, 51.34it/s]


Finished training. Avg loss: 1.9482


100%|██████████| 5742/5742 [00:16<00:00, 356.62it/s]


Fold 2:
Epoch 1


5742it [01:49, 52.28it/s]


Finished training. Avg loss: 1.9476


100%|██████████| 5742/5742 [00:16<00:00, 355.65it/s]


Fold 3:
Epoch 1


5742it [01:49, 52.25it/s]


Finished training. Avg loss: 1.9481


100%|██████████| 5742/5742 [00:16<00:00, 354.05it/s]


Fold 4:
Epoch 1


5742it [01:49, 52.44it/s]


Finished training. Avg loss: 1.9480


100%|██████████| 5742/5742 [00:16<00:00, 340.80it/s]


Fold 5:
Epoch 1


5742it [01:51, 51.39it/s]


Finished training. Avg loss: 1.9481


100%|██████████| 5741/5741 [00:16<00:00, 357.64it/s]


Done, error=0.748685
Testing Regularisation, learning rate, dropout rate value (0.01, 0.1, 0.3)...
Fold 1:
Epoch 1


5742it [01:55, 49.65it/s]


Finished training. Avg loss: 1.9501


100%|██████████| 5742/5742 [00:16<00:00, 352.86it/s]


Fold 2:
Epoch 1


5742it [01:52, 51.25it/s]


Finished training. Avg loss: 1.9501


100%|██████████| 5742/5742 [00:16<00:00, 357.57it/s]


Fold 3:
Epoch 1


5742it [01:51, 51.48it/s]


Finished training. Avg loss: 1.9503


100%|██████████| 5742/5742 [00:16<00:00, 355.46it/s]


Fold 4:
Epoch 1


5742it [01:52, 51.05it/s]


Finished training. Avg loss: 1.9504


100%|██████████| 5742/5742 [00:16<00:00, 338.99it/s]


Fold 5:
Epoch 1


5742it [01:54, 50.25it/s]


Finished training. Avg loss: 1.9506


100%|██████████| 5741/5741 [00:16<00:00, 356.76it/s]


Done, error=0.827057
Testing Regularisation, learning rate, dropout rate value (0.01, 0.1, 0.5)...
Fold 1:
Epoch 1


5742it [01:53, 50.55it/s]


Finished training. Avg loss: 1.9430


100%|██████████| 5742/5742 [00:16<00:00, 358.07it/s]


Fold 2:
Epoch 1


5742it [01:51, 51.29it/s]


Finished training. Avg loss: 1.9433


100%|██████████| 5742/5742 [00:16<00:00, 355.59it/s]


Fold 3:
Epoch 1


5742it [01:51, 51.29it/s]


Finished training. Avg loss: 1.9435


100%|██████████| 5742/5742 [00:16<00:00, 354.71it/s]


Fold 4:
Epoch 1


5742it [01:52, 50.88it/s]


Finished training. Avg loss: 1.9432


100%|██████████| 5742/5742 [00:16<00:00, 341.43it/s]


Fold 5:
Epoch 1


5742it [01:53, 50.59it/s]


Finished training. Avg loss: 1.9436


100%|██████████| 5741/5741 [00:16<00:00, 352.83it/s]


Done, error=0.748685
Testing Regularisation, learning rate, dropout rate value (0.01, 0.01, 0.3)...
Fold 1:
Epoch 1


5742it [01:55, 49.83it/s]


Finished training. Avg loss: 1.9376


100%|██████████| 5742/5742 [00:16<00:00, 352.82it/s]


Fold 2:
Epoch 1


5742it [01:55, 49.78it/s]


Finished training. Avg loss: 1.9377


100%|██████████| 5742/5742 [00:16<00:00, 342.94it/s]


Fold 3:
Epoch 1


5742it [01:54, 50.29it/s]


Finished training. Avg loss: 1.9380


100%|██████████| 5742/5742 [00:16<00:00, 345.93it/s]


Fold 4:
Epoch 1


5742it [01:55, 49.74it/s]


Finished training. Avg loss: 1.9370


100%|██████████| 5742/5742 [00:17<00:00, 330.11it/s]


Fold 5:
Epoch 1


5742it [01:55, 49.66it/s]


Finished training. Avg loss: 1.9377


100%|██████████| 5741/5741 [00:17<00:00, 324.76it/s]


Done, error=0.857292
Testing Regularisation, learning rate, dropout rate value (0.01, 0.01, 0.5)...
Fold 1:
Epoch 1


5742it [01:58, 48.65it/s]


Finished training. Avg loss: 1.9703


100%|██████████| 5742/5742 [00:16<00:00, 350.19it/s]


Fold 2:
Epoch 1


5742it [01:55, 49.77it/s]


Finished training. Avg loss: 1.9705


100%|██████████| 5742/5742 [00:16<00:00, 347.66it/s]


Fold 3:
Epoch 1


5742it [01:54, 49.99it/s]


Finished training. Avg loss: 1.9705


100%|██████████| 5742/5742 [00:16<00:00, 349.78it/s]


Fold 4:
Epoch 1


5742it [01:55, 49.62it/s]


Finished training. Avg loss: 1.9696


100%|██████████| 5742/5742 [00:17<00:00, 332.82it/s]


Fold 5:
Epoch 1


5742it [01:54, 49.97it/s]


Finished training. Avg loss: 1.9699


100%|██████████| 5741/5741 [00:16<00:00, 350.24it/s]


Done, error=0.860845
Testing Regularisation, learning rate, dropout rate value (0.01, 0.001, 0.3)...
Fold 1:
Epoch 1


5742it [01:55, 49.56it/s]


Finished training. Avg loss: 1.9294


100%|██████████| 5742/5742 [00:16<00:00, 350.01it/s]


Fold 2:
Epoch 1


5742it [01:54, 50.31it/s]


Finished training. Avg loss: 1.9296


100%|██████████| 5742/5742 [00:16<00:00, 344.55it/s]


Fold 3:
Epoch 1


5742it [01:54, 50.12it/s]


Finished training. Avg loss: 1.9296


100%|██████████| 5742/5742 [00:17<00:00, 329.66it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.49it/s]


Finished training. Avg loss: 1.9297


100%|██████████| 5742/5742 [00:16<00:00, 347.74it/s]


Fold 5:
Epoch 1


5742it [01:53, 50.38it/s]


Finished training. Avg loss: 1.9297


100%|██████████| 5741/5741 [00:16<00:00, 346.93it/s]


Done, error=0.748685
Testing Regularisation, learning rate, dropout rate value (0.01, 0.001, 0.5)...
Fold 1:
Epoch 1


5742it [01:56, 49.08it/s]


Finished training. Avg loss: 1.9762


100%|██████████| 5742/5742 [00:16<00:00, 356.00it/s]


Fold 2:
Epoch 1


5742it [01:54, 50.08it/s]


Finished training. Avg loss: 1.9764


100%|██████████| 5742/5742 [00:16<00:00, 348.00it/s]


Fold 3:
Epoch 1


5742it [01:53, 50.38it/s]


Finished training. Avg loss: 1.9766


100%|██████████| 5742/5742 [00:16<00:00, 346.93it/s]


Fold 4:
Epoch 1


5742it [01:53, 50.41it/s]


Finished training. Avg loss: 1.9762


100%|██████████| 5742/5742 [00:16<00:00, 341.71it/s]


Fold 5:
Epoch 1


5742it [01:55, 49.60it/s]


Finished training. Avg loss: 1.9766


100%|██████████| 5741/5741 [00:16<00:00, 349.87it/s]


Done, error=0.984813
Testing Regularisation, learning rate, dropout rate value (0.02, 0.1, 0.3)...
Fold 1:
Epoch 1


5742it [01:54, 50.28it/s]


Finished training. Avg loss: 1.9412


100%|██████████| 5742/5742 [00:16<00:00, 342.13it/s]


Fold 2:
Epoch 1


5742it [01:53, 50.81it/s]


Finished training. Avg loss: 1.9419


100%|██████████| 5742/5742 [00:17<00:00, 335.03it/s]


Fold 3:
Epoch 1


5742it [01:54, 50.23it/s]


Finished training. Avg loss: 1.9417


100%|██████████| 5742/5742 [00:16<00:00, 340.44it/s]


Fold 4:
Epoch 1


5742it [01:59, 48.02it/s]


Finished training. Avg loss: 1.9417


100%|██████████| 5742/5742 [00:17<00:00, 330.54it/s]


Fold 5:
Epoch 1


5742it [01:54, 50.00it/s]


Finished training. Avg loss: 1.9412


100%|██████████| 5741/5741 [00:16<00:00, 345.93it/s]


Done, error=0.889547
Testing Regularisation, learning rate, dropout rate value (0.02, 0.1, 0.5)...
Fold 1:
Epoch 1


5742it [01:58, 48.36it/s]


Finished training. Avg loss: 1.9268


100%|██████████| 5742/5742 [00:16<00:00, 345.83it/s]


Fold 2:
Epoch 1


5742it [01:58, 48.63it/s]


Finished training. Avg loss: 1.9262


100%|██████████| 5742/5742 [00:17<00:00, 325.62it/s]


Fold 3:
Epoch 1


5742it [01:57, 48.68it/s]


Finished training. Avg loss: 1.9263


100%|██████████| 5742/5742 [00:16<00:00, 341.68it/s]


Fold 4:
Epoch 1


5742it [01:57, 49.01it/s]


Finished training. Avg loss: 1.9266


100%|██████████| 5742/5742 [00:17<00:00, 337.15it/s]


Fold 5:
Epoch 1


5742it [01:57, 48.98it/s]


Finished training. Avg loss: 1.9260


100%|██████████| 5741/5741 [00:17<00:00, 334.75it/s]


Done, error=0.748685
Testing Regularisation, learning rate, dropout rate value (0.02, 0.01, 0.3)...
Fold 1:
Epoch 1


5742it [01:59, 47.92it/s]


Finished training. Avg loss: 1.9422


100%|██████████| 5742/5742 [00:17<00:00, 331.61it/s]


Fold 2:
Epoch 1


5742it [01:56, 49.08it/s]


Finished training. Avg loss: 1.9427


100%|██████████| 5742/5742 [00:17<00:00, 332.79it/s]


Fold 3:
Epoch 1


5742it [01:58, 48.58it/s]


Finished training. Avg loss: 1.9424


100%|██████████| 5742/5742 [00:18<00:00, 317.29it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.23it/s]


Finished training. Avg loss: 1.9422


100%|██████████| 5742/5742 [00:17<00:00, 336.15it/s]


Fold 5:
Epoch 1


5742it [01:55, 49.63it/s]


Finished training. Avg loss: 1.9424


100%|██████████| 5741/5741 [00:16<00:00, 342.82it/s]


Done, error=0.748685
Testing Regularisation, learning rate, dropout rate value (0.02, 0.01, 0.5)...
Fold 1:
Epoch 1


5742it [01:59, 48.13it/s]


Finished training. Avg loss: 1.9332


100%|██████████| 5742/5742 [00:16<00:00, 357.27it/s]


Fold 2:
Epoch 1


5742it [01:54, 50.08it/s]


Finished training. Avg loss: 1.9335


100%|██████████| 5742/5742 [00:17<00:00, 320.41it/s]


Fold 3:
Epoch 1


5742it [01:59, 48.07it/s]


Finished training. Avg loss: 1.9335


100%|██████████| 5742/5742 [00:16<00:00, 349.47it/s]


Fold 4:
Epoch 1


5742it [01:54, 50.06it/s]


Finished training. Avg loss: 1.9331


100%|██████████| 5742/5742 [00:16<00:00, 340.97it/s]


Fold 5:
Epoch 1


5742it [01:54, 50.33it/s]


Finished training. Avg loss: 1.9336


100%|██████████| 5741/5741 [00:16<00:00, 346.25it/s]


Done, error=0.854053
Testing Regularisation, learning rate, dropout rate value (0.02, 0.001, 0.3)...
Fold 1:
Epoch 1


5742it [01:55, 49.63it/s]


Finished training. Avg loss: 1.9604


100%|██████████| 5742/5742 [00:16<00:00, 349.85it/s]


Fold 2:
Epoch 1


5742it [01:55, 49.66it/s]


Finished training. Avg loss: 1.9605


100%|██████████| 5742/5742 [00:17<00:00, 337.69it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.89it/s]


Finished training. Avg loss: 1.9606


100%|██████████| 5742/5742 [00:16<00:00, 356.42it/s]


Fold 4:
Epoch 1


5742it [01:53, 50.51it/s]


Finished training. Avg loss: 1.9611


100%|██████████| 5742/5742 [00:16<00:00, 352.17it/s]


Fold 5:
Epoch 1


5742it [01:53, 50.61it/s]


Finished training. Avg loss: 1.9612


100%|██████████| 5741/5741 [00:16<00:00, 351.69it/s]


Done, error=0.857292
Testing Regularisation, learning rate, dropout rate value (0.02, 0.001, 0.5)...
Fold 1:
Epoch 1


5742it [01:58, 48.35it/s]


Finished training. Avg loss: 1.9275


100%|██████████| 5742/5742 [00:16<00:00, 339.15it/s]


Fold 2:
Epoch 1


5742it [01:58, 48.45it/s]


Finished training. Avg loss: 1.9271


100%|██████████| 5742/5742 [00:17<00:00, 324.24it/s]


Fold 3:
Epoch 1


5742it [01:56, 49.09it/s]


Finished training. Avg loss: 1.9272


100%|██████████| 5742/5742 [00:16<00:00, 345.24it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.48it/s]


Finished training. Avg loss: 1.9272


100%|██████████| 5742/5742 [00:16<00:00, 345.33it/s]


Fold 5:
Epoch 1


5742it [01:54, 49.97it/s]


Finished training. Avg loss: 1.9273


100%|██████████| 5741/5741 [00:16<00:00, 338.99it/s]


Done, error=0.831760
Testing Regularisation, learning rate, dropout rate value (0.06, 0.1, 0.3)...
Fold 1:
Epoch 1


5742it [01:56, 49.21it/s]


Finished training. Avg loss: 1.9580


100%|██████████| 5742/5742 [00:17<00:00, 322.23it/s]


Fold 2:
Epoch 1


5742it [01:57, 48.82it/s]


Finished training. Avg loss: 1.9583


100%|██████████| 5742/5742 [00:16<00:00, 348.63it/s]


Fold 3:
Epoch 1


5742it [01:53, 50.56it/s]


Finished training. Avg loss: 1.9583


100%|██████████| 5742/5742 [00:16<00:00, 346.87it/s]


Fold 4:
Epoch 1


5742it [01:54, 50.16it/s]


Finished training. Avg loss: 1.9577


100%|██████████| 5742/5742 [00:16<00:00, 337.86it/s]


Fold 5:
Epoch 1


5742it [01:57, 48.73it/s]


Finished training. Avg loss: 1.9583


100%|██████████| 5741/5741 [00:16<00:00, 343.54it/s]


Done, error=0.889547
Testing Regularisation, learning rate, dropout rate value (0.06, 0.1, 0.5)...
Fold 1:
Epoch 1


5742it [02:01, 47.17it/s]


Finished training. Avg loss: 1.9428


100%|██████████| 5742/5742 [00:17<00:00, 324.46it/s]


Fold 2:
Epoch 1


5742it [01:56, 49.34it/s]


Finished training. Avg loss: 1.9425


100%|██████████| 5742/5742 [00:16<00:00, 345.97it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.59it/s]


Finished training. Avg loss: 1.9428


100%|██████████| 5742/5742 [00:18<00:00, 313.72it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.16it/s]


Finished training. Avg loss: 1.9432


100%|██████████| 5742/5742 [00:17<00:00, 326.91it/s]


Fold 5:
Epoch 1


5742it [01:57, 48.67it/s]


Finished training. Avg loss: 1.9423


100%|██████████| 5741/5741 [00:17<00:00, 325.98it/s]


Done, error=0.827058
Testing Regularisation, learning rate, dropout rate value (0.06, 0.01, 0.3)...
Fold 1:
Epoch 1


5742it [01:59, 48.05it/s]


Finished training. Avg loss: 1.9274


100%|██████████| 5742/5742 [00:17<00:00, 333.83it/s]


Fold 2:
Epoch 1


5742it [01:56, 49.49it/s]


Finished training. Avg loss: 1.9272


100%|██████████| 5742/5742 [00:16<00:00, 338.61it/s]


Fold 3:
Epoch 1


5742it [01:56, 49.41it/s]


Finished training. Avg loss: 1.9272


100%|██████████| 5742/5742 [00:17<00:00, 319.32it/s]


Fold 4:
Epoch 1


5742it [01:57, 49.05it/s]


Finished training. Avg loss: 1.9272


100%|██████████| 5742/5742 [00:16<00:00, 344.50it/s]


Fold 5:
Epoch 1


5742it [01:59, 48.24it/s]


Finished training. Avg loss: 1.9269


100%|██████████| 5741/5741 [00:17<00:00, 322.11it/s]


Done, error=0.857292
Testing Regularisation, learning rate, dropout rate value (0.06, 0.01, 0.5)...
Fold 1:
Epoch 1


5742it [02:01, 47.45it/s]


Finished training. Avg loss: 1.9301


100%|██████████| 5742/5742 [00:18<00:00, 314.32it/s]


Fold 2:
Epoch 1


5742it [01:57, 48.89it/s]


Finished training. Avg loss: 1.9310


100%|██████████| 5742/5742 [00:16<00:00, 342.31it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.69it/s]


Finished training. Avg loss: 1.9307


100%|██████████| 5742/5742 [00:16<00:00, 341.28it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.16it/s]


Finished training. Avg loss: 1.9301


100%|██████████| 5742/5742 [00:16<00:00, 340.36it/s]


Fold 5:
Epoch 1


5742it [01:55, 49.84it/s]


Finished training. Avg loss: 1.9300


100%|██████████| 5741/5741 [00:16<00:00, 352.10it/s]


Done, error=0.860845
Testing Regularisation, learning rate, dropout rate value (0.06, 0.001, 0.3)...
Fold 1:
Epoch 1


5742it [01:58, 48.43it/s]


Finished training. Avg loss: 1.9646


100%|██████████| 5742/5742 [00:17<00:00, 329.64it/s]


Fold 2:
Epoch 1


5742it [01:55, 49.70it/s]


Finished training. Avg loss: 1.9649


100%|██████████| 5742/5742 [00:16<00:00, 348.89it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.84it/s]


Finished training. Avg loss: 1.9648


100%|██████████| 5742/5742 [00:16<00:00, 351.58it/s]


Fold 4:
Epoch 1


5742it [01:54, 50.19it/s]


Finished training. Avg loss: 1.9646


100%|██████████| 5742/5742 [00:17<00:00, 333.26it/s]


Fold 5:
Epoch 1


5742it [01:56, 49.31it/s]


Finished training. Avg loss: 1.9649


100%|██████████| 5741/5741 [00:17<00:00, 326.04it/s]


Done, error=0.857292
Testing Regularisation, learning rate, dropout rate value (0.06, 0.001, 0.5)...
Fold 1:
Epoch 1


5742it [01:59, 48.20it/s]


Finished training. Avg loss: 1.9342


100%|██████████| 5742/5742 [00:17<00:00, 334.45it/s]


Fold 2:
Epoch 1


5742it [01:55, 49.61it/s]


Finished training. Avg loss: 1.9344


100%|██████████| 5742/5742 [00:16<00:00, 346.26it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.52it/s]


Finished training. Avg loss: 1.9341


100%|██████████| 5742/5742 [00:16<00:00, 350.61it/s]


Fold 4:
Epoch 1


5742it [01:54, 50.20it/s]


Finished training. Avg loss: 1.9342


100%|██████████| 5742/5742 [00:16<00:00, 343.30it/s]


Fold 5:
Epoch 1


5742it [01:58, 48.29it/s]


Finished training. Avg loss: 1.9343


100%|██████████| 5741/5741 [00:17<00:00, 324.77it/s]


Done, error=0.860845
Testing Regularisation, learning rate, dropout rate value (0.1, 0.1, 0.3)...
Fold 1:
Epoch 1


5742it [01:59, 48.10it/s]


Finished training. Avg loss: 1.9516


100%|██████████| 5742/5742 [00:16<00:00, 349.92it/s]


Fold 2:
Epoch 1


5742it [01:54, 50.05it/s]


Finished training. Avg loss: 1.9516


100%|██████████| 5742/5742 [00:16<00:00, 351.71it/s]


Fold 3:
Epoch 1


5742it [01:54, 50.08it/s]


Finished training. Avg loss: 1.9519


100%|██████████| 5742/5742 [00:16<00:00, 345.83it/s]


Fold 4:
Epoch 1


5742it [01:57, 48.92it/s]


Finished training. Avg loss: 1.9518


100%|██████████| 5742/5742 [00:17<00:00, 319.31it/s]


Fold 5:
Epoch 1


5742it [01:59, 48.25it/s]


Finished training. Avg loss: 1.9517


100%|██████████| 5741/5741 [00:17<00:00, 322.75it/s]


Done, error=0.860845
Testing Regularisation, learning rate, dropout rate value (0.1, 0.1, 0.5)...
Fold 1:
Epoch 1


5742it [01:57, 48.88it/s]


Finished training. Avg loss: 1.9431


100%|██████████| 5742/5742 [00:17<00:00, 333.08it/s]


Fold 2:
Epoch 1


5742it [01:56, 49.21it/s]


Finished training. Avg loss: 1.9431


100%|██████████| 5742/5742 [00:16<00:00, 338.08it/s]


Fold 3:
Epoch 1


5742it [01:55, 49.86it/s]


Finished training. Avg loss: 1.9432


100%|██████████| 5742/5742 [00:16<00:00, 340.35it/s]


Fold 4:
Epoch 1


5742it [01:55, 49.66it/s]


Finished training. Avg loss: 1.9427


100%|██████████| 5742/5742 [00:17<00:00, 333.61it/s]


Fold 5:
Epoch 1


5742it [01:57, 49.08it/s]


Finished training. Avg loss: 1.9432


100%|██████████| 5741/5741 [00:16<00:00, 343.62it/s]


Done, error=0.852172
Testing Regularisation, learning rate, dropout rate value (0.1, 0.01, 0.3)...
Fold 1:
Epoch 1


5742it [01:57, 48.73it/s]


Finished training. Avg loss: 1.9421


100%|██████████| 5742/5742 [00:16<00:00, 338.34it/s]


Fold 2:
Epoch 1


5742it [01:57, 48.71it/s]


Finished training. Avg loss: 1.9421


100%|██████████| 5742/5742 [00:16<00:00, 342.08it/s]


Fold 3:
Epoch 1


5742it [01:53, 50.44it/s]


Finished training. Avg loss: 1.9422


100%|██████████| 5742/5742 [00:17<00:00, 333.28it/s]


Fold 4:
Epoch 1


5742it [01:58, 48.63it/s]


Finished training. Avg loss: 1.9416


100%|██████████| 5742/5742 [00:17<00:00, 321.95it/s]


Fold 5:
Epoch 1


5742it [01:56, 49.18it/s]


Finished training. Avg loss: 1.9420


100%|██████████| 5741/5741 [00:17<00:00, 328.06it/s]


Done, error=0.827023
Testing Regularisation, learning rate, dropout rate value (0.1, 0.01, 0.5)...
Fold 1:
Epoch 1


5742it [01:56, 49.40it/s]


Finished training. Avg loss: 1.9323


100%|██████████| 5742/5742 [00:16<00:00, 352.12it/s]


Fold 2:
Epoch 1


5742it [01:55, 49.51it/s]


Finished training. Avg loss: 1.9319


100%|██████████| 5742/5742 [00:16<00:00, 354.31it/s]


Fold 3:
Epoch 1


5742it [01:54, 49.96it/s]


Finished training. Avg loss: 1.9319


100%|██████████| 5742/5742 [00:17<00:00, 327.86it/s]


Fold 4:
Epoch 1


5742it [01:58, 48.28it/s]


Finished training. Avg loss: 1.9321


100%|██████████| 5742/5742 [00:17<00:00, 333.59it/s]


Fold 5:
Epoch 1


5742it [01:55, 49.89it/s]


Finished training. Avg loss: 1.9329


100%|██████████| 5741/5741 [00:16<00:00, 347.36it/s]


Done, error=0.860880
Testing Regularisation, learning rate, dropout rate value (0.1, 0.001, 0.3)...
Fold 1:
Epoch 1


5742it [02:00, 47.80it/s]


Finished training. Avg loss: 1.9402


100%|██████████| 5742/5742 [00:16<00:00, 347.56it/s]


Fold 2:
Epoch 1


5742it [01:56, 49.48it/s]


Finished training. Avg loss: 1.9410


100%|██████████| 5742/5742 [00:17<00:00, 337.47it/s]


Fold 3:
Epoch 1


5742it [01:58, 48.25it/s]


Finished training. Avg loss: 1.9408


100%|██████████| 5742/5742 [00:18<00:00, 317.47it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.09it/s]


Finished training. Avg loss: 1.9403


100%|██████████| 5742/5742 [00:16<00:00, 347.95it/s]


Fold 5:
Epoch 1


5742it [01:54, 50.13it/s]


Finished training. Avg loss: 1.9408


100%|██████████| 5741/5741 [00:16<00:00, 353.28it/s]


Done, error=0.889547
Testing Regularisation, learning rate, dropout rate value (0.1, 0.001, 0.5)...
Fold 1:
Epoch 1


5742it [01:58, 48.61it/s]


Finished training. Avg loss: 1.9401


100%|██████████| 5742/5742 [00:16<00:00, 341.07it/s]


Fold 2:
Epoch 1


5742it [01:58, 48.32it/s]


Finished training. Avg loss: 1.9396


100%|██████████| 5742/5742 [00:18<00:00, 318.55it/s]


Fold 3:
Epoch 1


5742it [01:59, 48.03it/s]


Finished training. Avg loss: 1.9402


100%|██████████| 5742/5742 [00:17<00:00, 333.91it/s]


Fold 4:
Epoch 1


5742it [01:56, 49.10it/s]


Finished training. Avg loss: 1.9396


100%|██████████| 5742/5742 [00:16<00:00, 347.52it/s]


Fold 5:
Epoch 1


5742it [01:56, 49.29it/s]


Finished training. Avg loss: 1.9397


100%|██████████| 5741/5741 [00:16<00:00, 349.83it/s]

Done, error=0.860845





### Training

In [45]:
%%time
# Train the model by supplying the netwrok object, loss function, optimizer, learning rate adjuster (scheduler), 
# trainloader and number of times to go over the dataset.
# The scheduler is optional and can be None. It can also be a list of several schedulers

num_epochs = 30
losses = train(net, criterion, optimizer, scheduler, trainloader, num_epochs, verbose=False)

Epoch 1


225it [01:20,  2.79it/s]


Epoch 2


225it [01:20,  2.79it/s]


Epoch 3


225it [01:20,  2.79it/s]


Epoch 4


225it [01:20,  2.79it/s]


Epoch 5


225it [01:20,  2.79it/s]


Epoch 6


225it [01:20,  2.80it/s]


Epoch 7


225it [01:25,  2.62it/s]


Epoch 8


225it [02:03,  1.82it/s]


Epoch 9


225it [02:17,  1.64it/s]


Epoch 10


225it [02:33,  1.47it/s]


Epoch 11


225it [02:35,  1.45it/s]


Epoch 12


225it [02:48,  1.34it/s]


Epoch 13


225it [03:09,  1.18it/s]


Epoch 14


225it [03:15,  1.15it/s]


Epoch 15


225it [03:37,  1.03it/s]


Epoch 16


225it [03:56,  1.05s/it]


Epoch 17


225it [04:16,  1.14s/it]


Epoch 18


225it [04:00,  1.07s/it]


Epoch 19


225it [03:51,  1.03s/it]


Epoch 20


225it [03:50,  1.03s/it]


Epoch 21


225it [03:51,  1.03s/it]


Epoch 22


225it [03:49,  1.02s/it]


Epoch 23


225it [03:55,  1.05s/it]


Epoch 24


225it [03:52,  1.03s/it]


Epoch 25


225it [03:41,  1.02it/s]


Epoch 26


225it [03:45,  1.00s/it]


Epoch 27


225it [03:46,  1.01s/it]


Epoch 28


225it [04:06,  1.10s/it]


Epoch 29


225it [04:01,  1.07s/it]


Epoch 30


225it [03:37,  1.03it/s]

Finished training. Avg loss: 1.8609
CPU times: user 1h 31min 51s, sys: 3min 2s, total: 1h 34min 54s
Wall time: 1h 30min 15s





---
## Testing

In [46]:
# Test the trained network on the testing dataset
# Supply the network object, loss function, testloader and dictionary of class labels
# The function will return a DataFrame with the network average loss and accuracy per class. 
# Pass this df to save_model() function in the next cell if you think it is a good model

result = check_accuracy(net, criterion, testloader, emotion_dict)

100%|██████████| 7178/7178 [00:43<00:00, 165.41it/s]

Average loss: 1.7656	Accuracy: 2272/7178 (31.7%)
Accuracy for class Angry: 13.8%
Accuracy for class Disgust: 31.2%
Accuracy for class Fear : 7.3%
Accuracy for class Happy: 61.9%
Accuracy for class Sad  : 5.7%
Accuracy for class Surprise: 70.9%
Accuracy for class Neutral: 24.6%





---
## Save and load the model

In [None]:
# Save the current model by supplying the net object, the parameters needed to initialize it from scratch (can be None),
# the loss function, the optimizer, the number of epochs used in training, the testing results DataFrame 
# and a random batch (needed for saving in ONNX format for further visualisation)

model_name = save_model(model, criterion, optimizer, scheduler, no_epochs, result, next(iter(trainloader))[0])

In [None]:
# Provide the name of the spec file of a model (without the extension) to load
# previously saved model as well as config with parameters used

mod_name = None # Assign the name of previously saved model
net, cfg = load_model(mod_name)