# Project: Facial emotion recognition
## MTH767P - Group 4

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
from pathlib import Path
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms

from utils import *

## Data loading and pre-processing 

#### Read data

In [None]:
# define dictionary for emotions
emotion_dict = {0: 'Angry', 
                1: 'Disgust', 
                2: 'Fear',
                3: 'Happy', 
                4: 'Sad',
                5: 'Surprise',
                6: 'Neutral'}

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5), (0.5))])
batch_size = 4
dataset = EmotionsDataset(root='./data', fname='icml_face_data.csv', transform=transform)

#### Visualize data


In [None]:
# generate sample of images
fig, ax = plt.subplots(batch_size, len(emotion_dict), figsize=(8, 4.5))

for i, em in emotion_dict.items():
    emotion = dataset.data[dataset.targets==i][:batch_size]
    # set title
    ax[0, i].set_title(em)
    for j in range(emotion.shape[0]):      
        im = emotion[j]
        ax[j, i].imshow(im, cmap='gray')
        ax[j, i].axis('off')
plt.tight_layout();
plt.savefig(output_path/'dataset_sample.png')

#### Split dataset

In [None]:
trainloader, testloader = dataset.split(ratio=0.8, batch_size=batch_size)

---
## Convolutional neural network setup 
### Network architecture

In [None]:
# Architecture of a simple convnet
# Specify your model by providing the list of dicts, each containing the name of the layer class (case-insensitive) 
# and the parameters necessary to initialise it. The in_channels and in_features are not necessary as they are 
# calculated automatically
# If the layer contains the activation function, give an initialised object of the relevant class

layers = [dict(ltype='conv2d', out_channels=6, kernel=7, activation=nn.ReLU(True)),
          dict(ltype='maxpool2d', kernel=2, stride=2),
          dict(ltype='conv2d', out_channels=16, kernel=5),
          dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
          dict(ltype='linear', out_features=120, activation=nn.ReLU(True)),
          dict(ltype='linear', out_features=7)
         ]
net = ConvNet(layers)

In [None]:
# Architecture with localization network (see Minaee 2021, and code in https://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html)

atn_layers = dict(
            attention=[dict(ltype='conv2d', out_channels=8, kernel=3),
                       dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                       dict(ltype='conv2d', out_channels=10, kernel=3),
                       dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                       dict(ltype='linear', out_features=48, activation=nn.ReLU(True)),
                       dict(ltype='linear', out_features=3*2)
                      ],
            features=[dict(ltype='conv2d', out_channels=10, kernel=3, activation=nn.ReLU(True)),
                      dict(ltype='conv2d', out_channels=10, kernel=3),
                      dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                      dict(ltype='conv2d', out_channels=10, kernel=3, activation=nn.ReLU(True)),
                      dict(ltype='conv2d', out_channels=10, kernel=3),
                      dict(ltype='maxpool2d', kernel=2, stride=2, activation=nn.ReLU(True)),
                      dict(ltype='dropout2d'),
                      dict(ltype='linear', out_features=50),
                      dict(ltype='linear', out_features=7)
                     ])
net = AttentionalNet(atn_layers)

In [None]:
# Display the architecture in plain taext format
net

### Loss function and optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)  # for adaptive learning rate 

---
## Training the neural network
### Training

In [None]:
%%time
# Train the model by supplying the netwrok object, loss function, optimizer, learning rate adjuster (scheduler), 
# trainloader and number of times to go over the dataset.
# The scheduler is optional and can be None. It can also be a list of several schedulers

num_epochs = 30
train(net, criterion, optimizer, scheduler, trainloader, num_epochs)

### Cross-validation

In [None]:
# Define number of folds to split the datasets into
# Define a grid of parameter values to check (if more then one param is tested - supply list of tuples as a grid)
# Define the objective function by using cross_validate() method that will accept iterated values of object(s) using 
# the parameter(s)

Kfolds = 8
momentum_grid = [0.5, 0.7, 0.9]
optim_grid = partial(optim.SGD, net.parameters(), lr=0.001) 
objective = lambda m: cross_validate(Kfolds, dataset, net, criterion, optim_grid(momentum=m), scheduler, emotion_dict, batch_size) 
bestval = grid_search(objective, momentum_grid, 'momentum')

---
## Testing

In [None]:
# Test the trained network on the testing dataset
# Supply the network object, loss function, testloader and dictionary of class labels
# The function will return a DataFrame with the network average loss and accuracy per class. 
# Pass this df to save_model() function in the next cell if you think it is a good model

result = check_accuracy_(net, criterion, testloader, emotion_dict)

---
## Save and load the model

In [None]:
# Save the current model by supplying the net object, the parameters needed to initialize it from scratch (can be None),
# the loss function, the optimizer, the number of epochs used in training, the testing results DataFrame 
# and a random batch (needed for saving in ONNX format for further visualisation)

model_name = save_model(net, layers, criterion, optimizer, scheduler, num_epochs, result, next(iter(trainloader))[0])

In [None]:
# Provide the name of the spec file of a model (without the extension) to load
# previously saved model as well as config with parameters used

mod_name = None # Assign the name of previously saved model
net, cfg = load_model(mod_name)