Import libraries

In [1]:
import cv2
import opencv_jupyter_ui as jcv2
from feat import Detector
from IPython.display import Image
from collections import Counter
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import torch
import torchvision.models as models
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load Dataloaders

In [3]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms



# Define image transforms (resizing, normalization, etc.)
transform = transforms.Compose([ 
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),  # Resize
    transforms.ToTensor(),          
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# FER
data_path_fer = r'C:\Users\Usuario\Uppsala\IIS\Project\data\FER2013\train' 
test_data_path_fer = r'C:\Users\Usuario\Uppsala\IIS\Project\data\FER2013\test'
# RAF
train_data_path_raf = r'C:\Users\Usuario\Uppsala\IIS\Project\data\RAF-DB\train'  
test_data_path_raf = r'C:\Users\Usuario\Uppsala\IIS\Project\data\RAF-DB\test' 



def load_dataset(data_path):
    dataset = datasets.ImageFolder(root=data_path, transform=transform)
    return dataset

# Load the datasets
full_dataset = load_dataset(data_path_fer)
test_dataset = load_dataset(test_data_path_fer)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

# split the dataset
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])


# DataLoader for batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


print(f"Train Classes: {full_dataset.classes}")  
print(f"Test Classes: {test_dataset.classes}") 

Train Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
Test Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [4]:
# Load the datasets RAF
full_dataset_raf = load_dataset(data_path_fer)
test_dataset_raf = load_dataset(test_data_path_fer)

train_size = int(0.8 * len(full_dataset_raf))
val_size = len(full_dataset_raf) - train_size

# split the dataset
train_dataset_raf, val_dataset_raf = random_split(full_dataset_raf, [train_size, val_size])


#  DataLoader for batching and shuffling
train_loader_raf = DataLoader(train_dataset_raf, batch_size=32, shuffle=True)
val_loader_raf = DataLoader(val_dataset_raf, batch_size=32, shuffle=False)
test_loader_raf = DataLoader(test_dataset_raf, batch_size=32, shuffle=False)


print(f"Train Classes: {full_dataset_raf.classes}")  
print(f"Test Classes: {test_dataset_raf.classes}") 

Train Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
Test Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


### AffectNet

In [3]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Define data paths
data_dir = r"C:\Users\Usuario\Uppsala\IIS\Project\data\AffectNet"  
train_dir = f"{data_dir}/train"
val_dir = f"{data_dir}/val"
test_dir = f"{data_dir}/test"

data_transforms = {
    "train": transforms.Compose([
        transforms.Resize((224, 224)),  # Resizing 
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  
    ]),
    "val": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
}

# Load datasets using ImageFolder
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms["train"])
val_dataset = datasets.ImageFolder(val_dir, transform=data_transforms["val"])
test_dataset = datasets.ImageFolder(test_dir, transform=data_transforms["val"])

# Create DataLoaders
batch_size = 32  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4)

# Check class names and dataset sizes
print(f"Classes: {train_dataset.classes}")
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")


Classes: ['angry', 'contempt', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
Number of training samples: 37553
Number of validation samples: 800
Number of test samples: 3200


### Pre-trained models:


#### Deepface

In [5]:
from deepface import DeepFace
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# Some detector backends to test
detector_backends = ['opencv', 'mtcnn', 'centerface', 'mediapipe']

class_labels = full_dataset.classes  

def evaluate_detector_backend(test_loader, detector_backend):
    """
    Evaluate the performance of a specific detector backend.
    
    """
    true_labels = []
    predicted_labels = []
    
    print(f"Evaluating detector backend: {detector_backend}")
    for images, labels in tqdm(test_loader, desc=f"Processing with {detector_backend}"):
        for i in range(images.size(0)): 
            # Convert tensor to a NumPy array 
            img = images[i].cpu().numpy().transpose(1, 2, 0)  # Convert to HWC format
            img = ((img * [0.229, 0.224, 0.225]) + [0.485, 0.456, 0.406]) * 255  # De-normalize
            img = img.astype('uint8') 

            try:
                # Predict emotion using the specified backend
                analysis = DeepFace.analyze(img_path=img, actions=['emotion'], detector_backend=detector_backend, enforce_detection=False)
                predicted_emotion = analysis[0]['dominant_emotion']

                true_labels.append(class_labels[labels[i].item()]) 
                predicted_labels.append(predicted_emotion.lower())
            except Exception as e:
                print(f"Error: {e}")
                continue

    # accuracy
    accuracy = accuracy_score(true_labels, predicted_labels) * 100
    print(f"Accuracy with {detector_backend}: {accuracy:.2f}%")
    return accuracy

# hyperparameter search
results = {}
for backend in detector_backends:
    accuracy = evaluate_detector_backend(test_loader, detector_backend=backend)
    results[backend] = accuracy

# overall results
print("\nFinal Results:")
for backend, acc in results.items():
    print(f"{backend}: {acc:.2f}%")


Evaluating detector backend: opencv


Processing with opencv: 100%|██████████| 225/225 [12:12<00:00,  3.25s/it]


Accuracy with opencv: 50.28%
Evaluating detector backend: mtcnn


Processing with mtcnn: 100%|██████████| 225/225 [38:57<00:00, 10.39s/it]


Accuracy with mtcnn: 52.97%
Evaluating detector backend: centerface


Processing with centerface: 100%|██████████| 225/225 [28:31<00:00,  7.61s/it]


Accuracy with centerface: 55.73%
Evaluating detector backend: mediapipe


Processing with mediapipe: 100%|██████████| 225/225 [05:08<00:00,  1.37s/it]

Accuracy with mediapipe: 45.81%

Final Results:
opencv: 50.28%
mtcnn: 52.97%
centerface: 55.73%
mediapipe: 45.81%





#### ResNet 18

In [4]:
# valuate the model on the test set
def evaluate_model(model, loader):
    print('------- Evaluating -------')
    model.eval()  
    correct = 0
    total = 0

    with torch.no_grad(): 
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)  
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(accuracy)
    return accuracy

In [5]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5):
    print('------------ Training ----------------')
    best_val_accuracy = 0.0 
    best_model_path = "best_resnet18.pth"  # save the best model

    for epoch in range(num_epochs):
        model.train()  
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)  # Calculate loss

            loss.backward()
            optimizer.step()

            # Track the accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            running_loss += loss.item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")

         # Validation phase
        model.eval() 
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()

                # Track validation accuracy
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100 * val_correct / val_total
        print(f"Validation Loss: {val_loss / len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.2f}%")
        print("-" * 50)

        # Save the model if it has the best validation accuracy
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), best_model_path)
            print(f"New best model saved with validation accuracy: {best_val_accuracy:.2f}%")

    print(f"Training complete. Best validation accuracy: {best_val_accuracy:.2f}%")
    return model





In [10]:
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
from torchvision.models import resnet50, ResNet18_Weights
import torch.nn as nn


# Load pretrained ResNet18
resnet = models.resnet18(weights=ResNet18_Weights.DEFAULT)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Freeze the convolutional layers
for param in resnet.parameters():
    param.requires_grad = False
    
# Replace the final fully connected layer
resnet.fc = nn.Linear(resnet.fc.in_features, len(full_dataset.classes))  # Align with number of emotion classes
resnet = resnet.to(device)

# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=0.001)

train_model(resnet, train_loader, val_loader, criterion, optimizer, num_epochs=30)

# Evaluate on the test set after training
test_accuracy = evaluate_model(resnet, test_loader)
print(f"Test Accuracy: {test_accuracy:.2f}%")

# Save the model's state dictionary
torch.save(resnet.state_dict(), "emotion_recognition_resnet18.pth")
print("Model saved successfully.")

------------ Training ----------------
Epoch [1/30], Loss: 1.6279, Accuracy: 36.05%
Validation Loss: 1.5121, Validation Accuracy: 41.27%
--------------------------------------------------
New best model saved with validation accuracy: 41.27%
Epoch [2/30], Loss: 1.5156, Accuracy: 41.50%
Validation Loss: 1.5067, Validation Accuracy: 40.98%
--------------------------------------------------
Epoch [3/30], Loss: 1.4878, Accuracy: 43.22%
Validation Loss: 1.4734, Validation Accuracy: 43.10%
--------------------------------------------------
New best model saved with validation accuracy: 43.10%
Epoch [4/30], Loss: 1.4787, Accuracy: 43.34%
Validation Loss: 1.5137, Validation Accuracy: 41.17%
--------------------------------------------------
Epoch [5/30], Loss: 1.4682, Accuracy: 43.79%
Validation Loss: 1.4963, Validation Accuracy: 43.23%
--------------------------------------------------
New best model saved with validation accuracy: 43.23%
Epoch [6/30], Loss: 1.4605, Accuracy: 43.99%
Validatio

#### ResNet 34

In [6]:

# Load pretrained ResNet34
resnet34 = models.resnet34(weights='DEFAULT')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Freeze the convolutional layers
for param in resnet34.parameters():
    param.requires_grad = False
    
# Replace the final fully connected layer
resnet34.fc = nn.Linear(resnet34.fc.in_features, len(full_dataset.classes))  # Align with number of emotion classes
resnet34 = resnet34.to(device)

# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet34.parameters(), lr=0.001)

train_model(resnet34, train_loader, val_loader, criterion, optimizer, num_epochs=30)

# Evaluate on the test set after training
test_accuracy_34 = evaluate_model(resnet34, test_loader)
print(f"Test Accuracy: {test_accuracy_34:.2f}%")

# Save the model's state dictionary
torch.save(resnet34.state_dict(), "emotion_recognition_resnet34.pth")
print("Model saved successfully.")

------------ Training ----------------
Epoch [1/30], Loss: 1.6196, Accuracy: 36.45%
Validation Loss: 1.5706, Validation Accuracy: 39.66%
--------------------------------------------------
New best model saved with validation accuracy: 39.66%
Epoch [2/30], Loss: 1.5176, Accuracy: 41.36%
Validation Loss: 1.5317, Validation Accuracy: 41.27%
--------------------------------------------------
New best model saved with validation accuracy: 41.27%
Epoch [3/30], Loss: 1.4897, Accuracy: 42.87%
Validation Loss: 1.5142, Validation Accuracy: 41.61%
--------------------------------------------------
New best model saved with validation accuracy: 41.61%
Epoch [4/30], Loss: 1.4704, Accuracy: 43.24%
Validation Loss: 1.4876, Validation Accuracy: 43.52%
--------------------------------------------------
New best model saved with validation accuracy: 43.52%
Epoch [5/30], Loss: 1.4677, Accuracy: 43.25%
Validation Loss: 1.4938, Validation Accuracy: 42.08%
--------------------------------------------------


#### MobileNetV2

In [9]:
from torchvision import datasets, transforms, models

# Load pretrained MobileNetV2 model
mobilenet_v2 = models.mobilenet_v2(pretrained=True)

# Freeze all convolutional layers 
for param in mobilenet_v2.parameters():
    param.requires_grad = False  # Freeze the convolutional layers

# Modify the final fully connected layer (classifier) to match emotion classes (FER2013)
mobilenet_v2.classifier[1] = nn.Linear(mobilenet_v2.classifier[1].in_features, 7)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mobilenet_v2 = mobilenet_v2.to(device)

# loss function and optimizer 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mobilenet_v2.classifier[1].parameters(), lr=0.001)

train_model(mobilenet_v2, train_loader, val_loader, criterion, optimizer, num_epochs=30)

mobilenet_v2_accuracy = evaluate_model(mobilenet_v2, test_loader)

# Save the model's state dictionary
torch.save(mobilenet_v2.state_dict(), "emotion_recognition_mobilenet_v2.pth")
print("Model saved successfully.")

------------ Training ----------------
Epoch [1/30], Loss: 1.6198, Accuracy: 36.64%
Validation Loss: 1.5435, Validation Accuracy: 42.02%
--------------------------------------------------
New best model saved with validation accuracy: 42.02%
Epoch [2/30], Loss: 1.5463, Accuracy: 40.77%
Validation Loss: 1.5446, Validation Accuracy: 40.46%
--------------------------------------------------
Epoch [3/30], Loss: 1.5307, Accuracy: 41.32%
Validation Loss: 1.5140, Validation Accuracy: 41.94%
--------------------------------------------------
Epoch [4/30], Loss: 1.5309, Accuracy: 40.78%
Validation Loss: 1.5130, Validation Accuracy: 41.68%
--------------------------------------------------
Epoch [5/30], Loss: 1.5331, Accuracy: 41.35%
Validation Loss: 1.4974, Validation Accuracy: 42.49%
--------------------------------------------------
New best model saved with validation accuracy: 42.49%
Epoch [6/30], Loss: 1.5190, Accuracy: 41.79%
Validation Loss: 1.4884, Validation Accuracy: 42.51%
----------

#### EfficientNetB0

In [7]:
# Load the pretrained EfficientNetB0 model
efficientnet_b0 = models.efficientnet_b0(weights='DEFAULT')

# Freeze all convolutional layers
for param in efficientnet_b0.parameters():
    param.requires_grad = False  # Freeze the convolutional layers

# Modify the final fully connected layer to match emotion classes (FER2013)
efficientnet_b0.classifier[1] = nn.Linear(efficientnet_b0.classifier[1].in_features, 7)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
efficientnet_b0 = efficientnet_b0.to(device)

# loss function and optimizer (only train the final FC layer)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(efficientnet_b0.classifier[1].parameters(), lr=0.001)

train_model(efficientnet_b0, train_loader, val_loader, criterion, optimizer, num_epochs=30)
efficientnet_b0_accuracy = evaluate_model(efficientnet_b0, test_loader)

# Save the model's state dictionary
torch.save(efficientnet_b0.state_dict(), "emotion_recognition_efficientnet_b0.pth")
print("Model saved successfully.")

------------ Training ----------------
Epoch [1/30], Loss: 1.5990, Accuracy: 37.37%
Validation Loss: 1.5118, Validation Accuracy: 40.98%
--------------------------------------------------
New best model saved with validation accuracy: 40.98%
Epoch [2/30], Loss: 1.5080, Accuracy: 41.88%
Validation Loss: 1.5011, Validation Accuracy: 42.96%
--------------------------------------------------
New best model saved with validation accuracy: 42.96%
Epoch [3/30], Loss: 1.4816, Accuracy: 42.88%
Validation Loss: 1.4816, Validation Accuracy: 43.16%
--------------------------------------------------
New best model saved with validation accuracy: 43.16%
Epoch [4/30], Loss: 1.4746, Accuracy: 43.36%
Validation Loss: 1.4620, Validation Accuracy: 43.78%
--------------------------------------------------
New best model saved with validation accuracy: 43.78%
Epoch [5/30], Loss: 1.4778, Accuracy: 43.45%
Validation Loss: 1.4650, Validation Accuracy: 43.94%
--------------------------------------------------


### Testing models on RAF-DB

In [12]:
import torch
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
from torchvision.models import resnet18, resnet34, mobilenet_v2, efficientnet_b0
from torchvision.models import ResNet18_Weights, ResNet34_Weights

def evaluate_model(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

def load_model(model_arch,model_path, num_classes, device):
    if model_arch == "resnet18":
        model = resnet18(weights=ResNet18_Weights.DEFAULT)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif model_arch == "resnet34":
        model = resnet34(weights=ResNet34_Weights.DEFAULT)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif model_arch == "mobilenet_v2":
        model = mobilenet_v2(weights='DEFAULT')
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    elif model_arch == "efficientnet_b0":
        model = efficientnet_b0(weights='DEFAULT')
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    else:
        raise ValueError(f"Unsupported model architecture: {model_arch}")

    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    return model

# Paths
model_paths = {
    "resnet18": r"C:\Users\Usuario\Uppsala\IIS\Project\User-perception sub-system\emotion_recognition_resnet18.pth",
    "resnet34": r"C:\Users\Usuario\Uppsala\IIS\Project\User-perception sub-system\emotion_recognition_resnet34.pth",
    "mobilenet_v2": r"C:\Users\Usuario\Uppsala\IIS\Project\User-perception sub-system\emotion_recognition_mobilenet_v2.pth",
    "efficientnet_b0": r"C:\Users\Usuario\Uppsala\IIS\Project\User-perception sub-system\emotion_recognition_efficientnet_b0.pth",
}

# Evaluate models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(full_dataset_raf.classes) 

for model_arch, model_path in model_paths.items():
    print(f"Loading {model_arch}...")
    model = load_model(model_arch, model_path, num_classes, device)
    print(f"Model {model_arch} loaded successfully!")


Loading resnet18...


  model.load_state_dict(torch.load(model_path, map_location=device))


Model resnet18 loaded successfully!
Loading resnet34...
Model resnet34 loaded successfully!
Loading mobilenet_v2...
Model mobilenet_v2 loaded successfully!
Loading efficientnet_b0...
Model efficientnet_b0 loaded successfully!


In [13]:
for model_arch, model_path in model_paths.items():
    print(f"Evaluating {model_arch}...")
    model = load_model(model_arch, model_path, num_classes, device)
    accuracy = evaluate_model(model, test_loader_raf, device)
    print(f"Accuracy of {model_arch} on the test dataset: {accuracy * 100:.2f}%")

Evaluating resnet18...


  model.load_state_dict(torch.load(model_path, map_location=device))


Accuracy of resnet18 on the test dataset: 40.94%
Evaluating resnet34...
Accuracy of resnet34 on the test dataset: 43.08%
Evaluating mobilenet_v2...
Accuracy of mobilenet_v2 on the test dataset: 42.91%
Evaluating efficientnet_b0...
Accuracy of efficientnet_b0 on the test dataset: 44.44%
