In [38]:
import glob
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader , random_split
from torchsummary import summary
import matplotlib.pyplot as plt
from random import randint
from tqdm import tqdm
from tqdm.notebook import trange, tqdm
import tensorflow as tf
import keras
import torch.nn.functional as F
from sklearn.metrics import f1_score, accuracy_score

In [3]:
print(tf.__version__)

2.18.0


In [3]:
#Data Loader for the FER training data conforming to PyTorch format

class FerTrain(Dataset):
    def __init__(self): # add additional parameters needed to load the dataset e.g dataset path
        self.train_images  = np.load('train_images.npy')
        self.train_labels = np.load('train_labels.npy')
        len(self.train_images)


    def __len__(self):
        return len(self.train_images)

    def __getitem__(self, idx):
        # your code here
        img = self.train_images[idx,:]
        #Upscale each image to 256x256
        img = cv2.resize(img, (256, 256), interpolation = cv2.INTER_LINEAR)
        img= img.reshape(256,256,1)
        img=img.astype(np.float32)
        img=img.T
        img_tensor = torch.from_numpy(img)

        label= torch.from_numpy(self.train_labels[idx]).to(torch.float32)

        return img_tensor, label

In [4]:
#Data Loader for the FER testing data conforming to PyTorch format

class FerTest(Dataset):
  def __init__(self): # add additional parameters needed to load the dataset e.g dataset path
        self.test_images  = np.load('test_images.npy')
        self.test_labels = np.load('test_labels.npy')


  def __len__(self):

        return self.test_images.shape[0]

  def __getitem__(self, idx):
      # your code here
      img = self.test_images[idx]
      img = cv2.resize(img, (256, 256), interpolation = cv2.INTER_LANCZOS4)
      img= img.reshape(256,256,1)
      img=img.astype(np.float32)
      img=img.T
      img_tensor = torch.from_numpy(img)

      label= torch.from_numpy(self.test_labels[idx]).to(torch.float32)

      return img_tensor, label

In [5]:
fer = FerTrain()
fer.__getitem__(6708)


(tensor([[[0.2784, 0.2784, 0.2784,  ..., 0.7137, 0.7137, 0.7137],
          [0.2784, 0.2784, 0.2784,  ..., 0.7137, 0.7137, 0.7137],
          [0.2784, 0.2784, 0.2784,  ..., 0.7137, 0.7137, 0.7137],
          ...,
          [0.0627, 0.0627, 0.0627,  ..., 0.5098, 0.5098, 0.5098],
          [0.0627, 0.0627, 0.0627,  ..., 0.5098, 0.5098, 0.5098],
          [0.0627, 0.0627, 0.0627,  ..., 0.5098, 0.5098, 0.5098]]]),
 tensor([0., 1., 0.]))

In [6]:
class EmotionDetectionNet(nn.Module):
    def __init__(self):
        super(EmotionDetectionNet, self).__init__()

        # Convolutional Layers
        self.conv = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=3, stride=1, padding=1),  # 255x255 -> 255x255
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, kernel_size=3, stride=2, padding=1),  # 255x255 -> 128x128
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),  # 128x128 -> 64x64
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),  # 64x64 -> 32x32
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),  # 32x32 -> 16x16
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),  # 16x16 -> 8x8
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )

        # Fully Connected Layers
        self.fc = nn.Sequential(
            nn.Linear(32 * 8 * 8, 1024),  # Flattened feature size -> 1024
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 3),  # 3 classes for classification
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        # Pass through convolutional layers
        x = self.conv(x)
        # Flatten the tensor for fully connected layers
        x = torch.flatten(x, start_dim=1)
        # Pass through fully connected layers
        x = self.fc(x)
        return x

In [7]:
device=None
if torch.cuda.is_available():
  device = 'cuda:0'
else:
  device = 'cpu'
print(device)
# Initialize model
model = EmotionDetectionNet()
# Display the model summary
# summary(model, (1, 48, 48))
net =  EmotionDetectionNet()
model.to(device)
# net.to(device)
summary(model,(1,256,256))


cuda:0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 256, 256, 256]           2,560
       BatchNorm2d-2        [-1, 256, 256, 256]             512
              ReLU-3        [-1, 256, 256, 256]               0
            Conv2d-4        [-1, 128, 128, 128]         295,040
       BatchNorm2d-5        [-1, 128, 128, 128]             256
              ReLU-6        [-1, 128, 128, 128]               0
            Conv2d-7           [-1, 64, 64, 64]          73,792
       BatchNorm2d-8           [-1, 64, 64, 64]             128
              ReLU-9           [-1, 64, 64, 64]               0
           Conv2d-10           [-1, 32, 32, 32]          18,464
      BatchNorm2d-11           [-1, 32, 32, 32]              64
             ReLU-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 32, 16, 16]           9,248
      BatchNorm2d-14           [

In [21]:
bath_size = 10
print(len(fer))
split_size_train = int(len(fer)*0.75) #create a 75 25 train validate split
split_size_val = len(fer) - split_size_train
train,valid = random_split(fer, lengths=[split_size_train, split_size_val])
print(len(train),len(valid))
train_dataloader = DataLoader(train, batch_size=bath_size, shuffle=True)
valid_dataloader = DataLoader(valid, batch_size=bath_size, shuffle=True)


16175
12131 4044


In [22]:
learning_rate = 1e-4
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
def calculate_accuracy(predictions, labels):
    predicted = torch.argmax(predictions, dim=1)
    orig = torch.argmax(labels, dim=1)
    correct = (predicted == orig).sum().item()
    return correct

In [25]:
#training loop, reporting accuracy at each itteration

last_acc= 100
print(device)
for param in model.parameters():
    param.requires_grad = True
for epoch in tqdm(range(50)):
    # monitor training loss
    train_loss = 0.0
    model.train()
    train_accuracy = 0.0
    #Training

    for data,label in tqdm(train_dataloader):

        images = data
        images = images.to(device)
        optimizer.zero_grad()
        outputs = model(images)

        label=label.to(device)

        train_accuracy += calculate_accuracy(outputs, label)
        loss = loss_function(outputs, label)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()


    valid_loss = 0.0
    val_accuracy = 0.0
    model.eval()
    for data, label in valid_dataloader:

        data = data.to(device)
        target = model(data)
        label=label.to(device)
        val_accuracy += calculate_accuracy(target, label)
        loss = loss_function(target, label)
        valid_loss += loss.item()


    if val_accuracy/len(valid) < last_acc:
        last_acc = val_accuracy
        torch.save(model.state_dict(),'weights\\FER_3_256x256_2.pt')

    print("EPOCH ", epoch+1,"   Training acc ",train_accuracy/len(train), "validation acc = " ,val_accuracy/len(valid))

cuda:0


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  1    Training acc  0.5429890363531449 validation acc =  0.5905044510385756


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  2    Training acc  0.6146236913692193 validation acc =  0.6293273986152325


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  3    Training acc  0.6541093067348116 validation acc =  0.6394658753709199


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  4    Training acc  0.6752946995301294 validation acc =  0.6580118694362018


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  5    Training acc  0.6963152254554448 validation acc =  0.6644411473788329


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  6    Training acc  0.7175830516857638 validation acc =  0.6585064292779427


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  7    Training acc  0.7349765064710246 validation acc =  0.6706231454005934


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  8    Training acc  0.7513807600362707 validation acc =  0.6817507418397626


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  9    Training acc  0.7646525430714698 validation acc =  0.6812561819980217


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  10    Training acc  0.7747094221416206 validation acc =  0.6772997032640949


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  11    Training acc  0.7875690380018135 validation acc =  0.6958456973293768


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  12    Training acc  0.799934053251999 validation acc =  0.6866963402571711


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  13    Training acc  0.8119693347621796 validation acc =  0.6852126607319485


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  14    Training acc  0.8189761767372846 validation acc =  0.6975766567754699


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  15    Training acc  0.8339790619075097 validation acc =  0.6866963402571711


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  16    Training acc  0.8426345725826395 validation acc =  0.6884272997032641


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  17    Training acc  0.8477454455527161 validation acc =  0.6842235410484668


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  18    Training acc  0.8510427829527656 validation acc =  0.6904055390702275


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  19    Training acc  0.8559887890528398 validation acc =  0.6931256181998022


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  20    Training acc  0.8617591295029264 validation acc =  0.6938674579624134


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  21    Training acc  0.8737119775781057 validation acc =  0.6906528189910979


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  22    Training acc  0.8712389745280685 validation acc =  0.7032640949554896


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  23    Training acc  0.8824499216882368 validation acc =  0.6933728981206726


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  24    Training acc  0.8861594262632924 validation acc =  0.6911473788328387


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  25    Training acc  0.8830269557332454 validation acc =  0.6745796241345203


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  26    Training acc  0.8890445964883357 validation acc =  0.690158259149357


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  27    Training acc  0.8977001071634655 validation acc =  0.6941147378832839


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  28    Training acc  0.8941554694584123 validation acc =  0.6881800197823936


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  29    Training acc  0.8968757728134531 validation acc =  0.6913946587537092


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  30    Training acc  0.9020690792185311 validation acc =  0.6951038575667656


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  31    Training acc  0.902398812958536 validation acc =  0.6874381800197824


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  32    Training acc  0.902398812958536 validation acc =  0.6869436201780416


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  33    Training acc  0.9105597230236584 validation acc =  0.6812561819980217


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  34    Training acc  0.9106421564586596 validation acc =  0.6985657764589516


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  35    Training acc  0.9128678592036931 validation acc =  0.6869436201780416


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  36    Training acc  0.9090759211936361 validation acc =  0.6862017804154302


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  37    Training acc  0.9134448932487017 validation acc =  0.6881800197823936


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  38    Training acc  0.9146813947737202 validation acc =  0.68026706231454


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  39    Training acc  0.9216058033138241 validation acc =  0.6928783382789317


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  40    Training acc  0.9200395680488006 validation acc =  0.6894164193867458


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  41    Training acc  0.922265270793834 validation acc =  0.6869436201780416


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  42    Training acc  0.9186381996537796 validation acc =  0.6941147378832839


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  43    Training acc  0.9232544720138488 validation acc =  0.6859545004945599


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  44    Training acc  0.9245734069738686 validation acc =  0.6980712166172107


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  45    Training acc  0.9262220756738934 validation acc =  0.69188921859545


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  46    Training acc  0.9279531778089193 validation acc =  0.6916419386745796


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  47    Training acc  0.9276234440689144 validation acc =  0.6911473788328387


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  48    Training acc  0.929931580248949 validation acc =  0.6913946587537092


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  49    Training acc  0.9311680817739675 validation acc =  0.6832344213649851


  0%|          | 0/1214 [00:00<?, ?it/s]

EPOCH  50    Training acc  0.93133294864397 validation acc =  0.6899109792284867


## Testing on the FER Dataset

In [12]:

model_test = EmotionDetectionNet()
model_test.load_state_dict(torch.load('weights\\FER_3_256x256_2.pt', weights_only=True))
model_test.to(device)
fer_test = FerTest()
test_dataloader = DataLoader(fer_test, batch_size=25, shuffle=False)
test_accuracy = 0.0
print(len(fer_test))
for data, label in test_dataloader:
  data = data.to(device)
  target = model_test(data)
  label=label.to(device)
  test_accuracy += calculate_accuracy(target, label)

print(f'Test Accuracy: {test_accuracy/len(fer_test)}')

3965
Test Accuracy: 0.6968474148802017


## Fine-Tuning on the MLI-DER Dataset

In [24]:
class DrivingFineTune(Dataset):
    def __init__(self, samples=100): # add additional parameters needed to load the dataset e.g dataset path
        self.train_images  = np.load('train_images_fine_tune.npy')
        random_samples = np.random.randint(self.train_images.shape[0], size=samples)
        self.train_images = self.train_images[random_samples, :]
        self.train_labels = np.load('train_labels_fine_tune.npy')
        self.train_labels = self.train_labels[random_samples,:]
        len(self.train_images.shape)

    def __len__(self):
        return len(self.train_images)

    def __getitem__(self, idx):
        img = self.train_images[idx]
        # img = cv2.resize(img, (256, 256), interpolation = cv2.INTER_LINEAR)
        # img= img.reshape(256,256,1)
        img=img.astype(np.float32)
        img=img.T
        img_tensor = torch.from_numpy(img)

        label= torch.from_numpy(self.train_labels[idx]).to(torch.float32)

        return img_tensor, label

In [25]:
driving = DrivingFineTune()
driving.__getitem__(50)

(tensor([[[0.5312, 0.5308, 0.5167,  ..., 0.1738, 0.1614, 0.1778],
          [0.5279, 0.5113, 0.5036,  ..., 0.1775, 0.1773, 0.1659],
          [0.5090, 0.4994, 0.4862,  ..., 0.1567, 0.1636, 0.1933],
          ...,
          [0.2518, 0.2503, 0.2374,  ..., 0.2187, 0.2135, 0.2112],
          [0.2479, 0.2398, 0.2325,  ..., 0.1977, 0.2179, 0.2253],
          [0.2312, 0.2285, 0.2193,  ..., 0.2023, 0.1969, 0.1893]]]),
 tensor([1., 0., 0.]))

In [26]:
t = DrivingFineTune()
print(t.__getitem__(0)[1])
print(t.__getitem__(30)[1])
print(t.__getitem__(20)[1])


tensor([0., 0., 1.])
tensor([0., 0., 1.])
tensor([1., 0., 0.])


In [27]:
model_fine_tune = EmotionDetectionNet()

for samples in range(100,801, 100):
    model_fine_tune.load_state_dict(torch.load('weights\\FER_3_256x256.pt', weights_only=True))
    learning_rate = 1e-4
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    model_fine_tune.to(device)
    fer_fine_tune = DrivingFineTune(samples=samples)
    fine_tune_dataloader = DataLoader(fer_fine_tune, batch_size=10, shuffle=True)
    for param in model_fine_tune.parameters():
        param.requires_grad = True


    for epoch in tqdm(range(30)):
        train_loss = 0.0
        model_fine_tune.train()
        train_accuracy = 0.0
        #Training

        for data,label in fine_tune_dataloader:

            images = data
            images = images.to(device)
            optimizer.zero_grad()
            outputs = model_fine_tune(images)

            label=label.to(device)

            train_accuracy += calculate_accuracy(outputs, label)
            loss = loss_function(outputs, label)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
    print(f"Samples: {samples} EPOCH: ", epoch+1,"   Training acc ",train_accuracy/len(fer_fine_tune))
    torch.save(model.state_dict(),f'weights\\Fine Tune\\base_{samples}.pt')
    

  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 100 EPOCH:  30    Training acc  0.27


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 200 EPOCH:  30    Training acc  0.27


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 300 EPOCH:  30    Training acc  0.2966666666666667


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 400 EPOCH:  30    Training acc  0.2475


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 500 EPOCH:  30    Training acc  0.332


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 600 EPOCH:  30    Training acc  0.31166666666666665


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 700 EPOCH:  30    Training acc  0.30428571428571427


  0%|          | 0/30 [00:00<?, ?it/s]

Samples: 800 EPOCH:  30    Training acc  0.36125


## Testing on the MLI-DER Dataset after Finetuning

In [28]:
class FerDrivingTest(Dataset):
  def __init__(self): # add additional parameters needed to load the dataset e.g dataset path
        self.test_images  = np.load('driving_data_test_images.npy')
        self.test_labels = np.load('driving_data_test_labels.npy')
        print(self.test_images.shape)

  def __len__(self):
        return self.test_images.shape[0]

  def __getitem__(self, idx):
      # your code here
      img = self.test_images[idx]
    #   img = cv2.resize(img, (256, 256), interpolation = cv2.INTER_LANCZOS4)
    #   img= img.reshape(256,256,1)
      img=img.astype(np.float32)
      img=img.T
      img_tensor = torch.from_numpy(img)
      label= torch.from_numpy(self.test_labels[idx]).to(torch.float32)

      return img_tensor, label

In [None]:
model_test = EmotionDetectionNet()
for models in glob.glob('weights\\Fine Tune\\*.pt'):
    num_samples = models.split('_')[-1][:3]
    model_test.load_state_dict(torch.load(models, weights_only=True))
    model_test.to(device)
    fer_test = FerDrivingTest()
    test_dataloader = DataLoader(fer_test, batch_size=1, shuffle=False)
    preds = []
    labels_arg_max = []
    
    for data, label in test_dataloader:
        data = data.to(device)
        target = model_test(data)
        label=label.to(device)
        label_max = torch.argmax(label)
        pred = torch.argmax(target)
        preds.append(label_max.item())
        labels_arg_max.append(pred.item())


    print(f'Samples: {num_samples} Test Accuracy: {accuracy_score(preds, labels_arg_max)} F1 score: {f1_score(preds, labels_arg_max, average='weighted')}')

(938, 256, 256, 1)
Samples: 100 Test Accuracy: 0.3336886993603412 F1 score: 0.3073870184263428
(938, 256, 256, 1)
Samples: 200 Test Accuracy: 0.32196162046908317 F1 score: 0.30747595307920744
(938, 256, 256, 1)
Samples: 300 Test Accuracy: 0.3304904051172708 F1 score: 0.31018849156334655
(938, 256, 256, 1)
Samples: 400 Test Accuracy: 0.326226012793177 F1 score: 0.3078768715070943
(938, 256, 256, 1)
Samples: 500 Test Accuracy: 0.31769722814498935 F1 score: 0.2986123947881325
(938, 256, 256, 1)
Samples: 600 Test Accuracy: 0.31023454157782515 F1 score: 0.2882829462571556
(938, 256, 256, 1)
Samples: 700 Test Accuracy: 0.31236673773987206 F1 score: 0.29167150494198946
(938, 256, 256, 1)
Samples: 800 Test Accuracy: 0.31769722814498935 F1 score: 0.29788871100005415
