# RAF-DB Dataset

In [None]:
# trying to fix cv2 numpy 2> incompatable problem
#%pip install "numpy<2.0"

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import kagglehub
import matplotlib.pyplot as plt
import kagglehub

In [None]:
# Download two datasets
path_raf = kagglehub.dataset_download("shuvoalok/raf-db-dataset")
print("RAF‑DB dataset path:", path_raf)

RAF‑DB dataset path: /kaggle/input/raf-db-dataset


In [None]:
# not sure why we need dataset path exactly, if alr stored in path_raf
# old code:
# dataset_path_raf = '/root/.cache/kagglehub/datasets/shuvoalok/raf-db-dataset/versions/2/'

# all the 'path_raf's below used to be dataset_path_raf

train_labels_csv_raf = os.path.join(path_raf, 'train_labels.csv')
test_labels_csv_raf = os.path.join(path_raf, 'test_labels.csv')

images_set_raf = os.path.join(path_raf, 'DATASET')

images_train_set_raf = os.path.join(images_set_raf, 'train')
images_test_set_raf = os.path.join(images_set_raf, 'test')

In [None]:
# RAF labels
# 1 = Surprise
# 2 = Fear
# 3 = Disgust
# 4 = Happy
# 5 = Sad
# 6 = Angry
# 7 = Neutral

# Choose 100x100 for both target datasets
target_size = (100, 100)

# Translate RAF labels to match FER's style
raf_to_fer = {
  '1': '5',   # Surprise
  '2': '2',   # Fear
  '3': '1',   # Disgust
  '4': '3',   # Happy
  '5': '4',   # Sad
  '6': '0',   # Angry
  '7': '6'    # Neutral
}

In [None]:
# Load RAF training labels CSV
train_labels_df_raf = pd.read_csv(train_labels_csv_raf)

# Load RAF training images
raf_train_images = []
raf_train_labels = []

for idx, row in train_labels_df_raf.iterrows():
  img_filename = row['image']
  original_label = str(row['label'])

  # just in case
  if original_label not in raf_to_fer:
    print(f"Cannot find RAF label: {original_label}")
    continue
  mapped_label = raf_to_fer[original_label]

  img_path = os.path.join(images_train_set_raf, original_label, img_filename)
  img = cv2.imread(img_path)

  # Resize and normalize image
  img = cv2.resize(img, target_size)
  img = img.astype('float32') / 255.0

  raf_train_images.append(img)
  raf_train_labels.append(int(mapped_label))

X_raf_train = np.array(raf_train_images)
y_raf_train = np.array(raf_train_labels)
print("RAF‑DB training images shape:", X_raf_train.shape)

RAF‑DB training images shape: (12271, 100, 100, 3)


In [None]:
raf_test_images = []
raf_test_labels = []

test_labels_df_raf = pd.read_csv(test_labels_csv_raf)
for idx, row in test_labels_df_raf.iterrows():
  img_filename = row['image']
  original_label = str(row['label'])
  if original_label not in raf_to_fer:
    print(f"CAnnot find RAF label: {original_label}")
    continue
  mapped_label = raf_to_fer[original_label]

  img_path = os.path.join(images_test_set_raf, original_label, img_filename)
  img = cv2.imread(img_path)

  img = cv2.resize(img, target_size)
  img = img.astype('float32') / 255.0

  raf_test_images.append(img)
  raf_test_labels.append(mapped_label)

X_raf_test = np.array(raf_test_images)
y_raf_test = np.array(raf_test_labels)
print("RAF‑DB test images shape:", X_raf_test.shape)

# Combine training and test sets of RAF
X_raf = np.concatenate((X_raf_train, X_raf_test), axis=0)
y_raf = np.concatenate((y_raf_train, y_raf_test), axis=0)
print("Combined RAF‑DB images shape:", X_raf.shape)

RAF‑DB test images shape: (3068, 100, 100, 3)
Combined RAF‑DB images shape: (15339, 100, 100, 3)


In [None]:
X_final_train, X_final_test, y_final_train, y_final_test = train_test_split(
  X_raf, y_raf, test_size=0.2, random_state=42, shuffle=True, stratify=y_raf
)

print("\nFinal Training set shape:", X_final_train.shape, y_final_train.shape)
print("Final Test set shape:", X_final_test.shape, y_final_test.shape)


Final Training set shape: (12271, 100, 100, 3) (12271,)
Final Test set shape: (3068, 100, 100, 3) (3068,)


-------

### Implementation

In [None]:
import torch
from torch import nn, optim
import torch.nn.functional as F

In [None]:
y_final_train[0]

np.str_('5')

In [None]:
# need to make all labels into ints, aligning with FER-2013 labeling
fer_mapping = { # already defined above but redefined here for convenience
  'angry': '0',
  'disgust': '1',
  'fear': '2',
  'happy': '3',
  'sad': '4',
  'surprise': '5',
  'neutral': '6'
}

def to_fer_labels(labels):
    for i in range(len(labels)):
        if labels[i] in fer_mapping.keys():
            labels[i] = fer_mapping[labels[i]]

to_fer_labels(y_final_train)

In [None]:
y_final_train[:5]

array(['5', '2', '6', '4', '6'], dtype='<U21')

In [None]:
# before implementation: need to split dataset again for happy-sad and all the other emotions
# RAF-DB is currently using FER-2013 label system

def split_datasets(images, labels):
    pcnn_images, pcnn_labels = [], []
    scnn_images, scnn_labels = [], []

    for img, lbl in zip(images, labels):
        if lbl == '3':   # Happy
            pcnn_images.append(img)
            pcnn_labels.append(0)
        elif lbl == '4':  # Sad
            pcnn_images.append(img)
            pcnn_labels.append(1)
        elif lbl in ['5', '6', '0', '2']:  # Surprise, Neutral, Angry, Fear
            mapped_label = {'5':'0', '6':'1', '0':'2', '2':'3'}[lbl]
            scnn_images.append(img)
            scnn_labels.append(mapped_label)

    return (pcnn_images, pcnn_labels), (scnn_images, scnn_labels)

# split RAF-DB
(pcnn_train_imgs, pcnn_train_labels), _ = split_datasets(X_final_train, y_final_train)
(pcnn_test_imgs, pcnn_test_labels), _ = split_datasets(X_final_test, y_final_test)

In [None]:
## implementation of networks

### P-CNN

In [None]:
# P-CNN

class PCNN(nn.Module):
    def __init__(self):
        super(PCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 12 * 12, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 2)  # happy or sad

        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x


### S-CNN


In [None]:
class SCNN(nn.Module):
    def __init__(self):
        super(SCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv5 = nn.Conv2d(256, 512, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(512 * 3 * 3, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 4)

        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 100 -> 50
        x = self.pool(F.relu(self.conv2(x)))  # 50 -> 25
        x = self.pool(F.relu(self.conv3(x)))  # 25 -> 12
        x = self.pool(F.relu(self.conv4(x)))  # 12 -> 6
        x = self.pool(F.relu(self.conv5(x)))  # 6 -> 3
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        return self.fc3(x)


In [None]:
class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        x = torch.tensor(self.images[idx]).permute(2, 0, 1).float()  # HWC → CHW
        y = torch.tensor(self.labels[idx]).long()
        return x, y

In [None]:
def train_model(model, dataloader, criterion, optimizer, device, epochs=10):
    model.to(device)
    model.train()
    history = []

    for epoch in range(epochs):
        total_loss = 0
        correct = 0

        for x, y in dataloader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * x.size(0)
            correct += (out.argmax(1) == y).sum().item()

        acc = correct / len(dataloader.dataset)
        history.append(acc)
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(dataloader.dataset):.4f}, Accuracy: {acc:.4f}")
    return history

In [None]:
from torch.utils.data import DataLoader

pcnn_train_ds = EmotionDataset(pcnn_train_imgs, pcnn_train_labels)
pcnn_test_ds = EmotionDataset(pcnn_test_imgs, pcnn_test_labels)

pcnn_train_loader = DataLoader(pcnn_train_ds, batch_size=64, shuffle=True)
pcnn_test_loader = DataLoader(pcnn_test_ds, batch_size=64, shuffle=False)

model = PCNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

pcnn_acc_hist = train_model(model, pcnn_train_loader, criterion, optimizer, device, epochs=20)
print(f"final P-CNN train accuracy: {pcnn_acc_hist[-1]:.4f}")


Epoch 1, Loss: 0.5151, Accuracy: 0.7535
Epoch 2, Loss: 0.3384, Accuracy: 0.8558
Epoch 3, Loss: 0.2791, Accuracy: 0.8812
Epoch 4, Loss: 0.2404, Accuracy: 0.8974
Epoch 5, Loss: 0.2002, Accuracy: 0.9201
Epoch 6, Loss: 0.1750, Accuracy: 0.9303
Epoch 7, Loss: 0.1503, Accuracy: 0.9403
Epoch 8, Loss: 0.1123, Accuracy: 0.9548
Epoch 9, Loss: 0.0927, Accuracy: 0.9618
Epoch 10, Loss: 0.0868, Accuracy: 0.9661
Epoch 11, Loss: 0.0645, Accuracy: 0.9770
Epoch 12, Loss: 0.0497, Accuracy: 0.9819
Epoch 13, Loss: 0.0491, Accuracy: 0.9834
Epoch 14, Loss: 0.0300, Accuracy: 0.9884
Epoch 15, Loss: 0.0164, Accuracy: 0.9950
Epoch 16, Loss: 0.0397, Accuracy: 0.9854
Epoch 17, Loss: 0.0320, Accuracy: 0.9890
Epoch 18, Loss: 0.0208, Accuracy: 0.9936
Epoch 19, Loss: 0.0202, Accuracy: 0.9936
Epoch 20, Loss: 0.0296, Accuracy: 0.9900
final P-CNN train accuracy: 0.9900


In [None]:
pcnn_model = model

In [None]:
model

PCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=18432, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [None]:
_, (scnn_train_imgs, scnn_train_labels) = split_datasets(X_final_train, y_final_train)
_, (scnn_test_imgs, scnn_test_labels) = split_datasets(X_final_test, y_final_test)

scnn_train_labels = list(map(int, scnn_train_labels))
scnn_test_labels = list(map(int, scnn_test_labels))


scnn_train_ds = EmotionDataset(scnn_train_imgs, scnn_train_labels)
scnn_test_ds = EmotionDataset(scnn_test_imgs, scnn_test_labels)

scnn_train_loader = DataLoader(scnn_train_ds, batch_size=64, shuffle=True)
scnn_test_loader = DataLoader(scnn_test_ds, batch_size=64, shuffle=False)


In [None]:
scnn_train_labels[0]

0

In [None]:
scnn_test_labels[:10]

[0, 2, 1, 1, 3, 0, 1, 3, 0, 3]

In [None]:
scnn_model = SCNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(scnn_model.parameters(), lr=1e-3)

scnn_acc_hist = train_model(scnn_model, scnn_train_loader, criterion, optimizer, device, epochs=20)
print(f"final S-CNN train accuracy: {scnn_acc_hist[-1]:.4f}")

torch.save(scnn_model.state_dict(), "scnn_model.pth")
scnn_model = SCNN()
scnn_model.load_state_dict(torch.load("scnn_model.pth"))
#model.eval()

Epoch 1, Loss: 1.1656, Accuracy: 0.5244
Epoch 2, Loss: 1.0424, Accuracy: 0.5672
Epoch 3, Loss: 0.8836, Accuracy: 0.6553
Epoch 4, Loss: 0.7471, Accuracy: 0.7289
Epoch 5, Loss: 0.6054, Accuracy: 0.7785
Epoch 6, Loss: 0.4949, Accuracy: 0.8267
Epoch 7, Loss: 0.4256, Accuracy: 0.8534
Epoch 8, Loss: 0.3671, Accuracy: 0.8737
Epoch 9, Loss: 0.3043, Accuracy: 0.8987
Epoch 10, Loss: 0.2765, Accuracy: 0.9067
Epoch 11, Loss: 0.2081, Accuracy: 0.9297
Epoch 12, Loss: 0.1671, Accuracy: 0.9440
Epoch 13, Loss: 0.1510, Accuracy: 0.9477
Epoch 14, Loss: 0.1135, Accuracy: 0.9609
Epoch 15, Loss: 0.1208, Accuracy: 0.9584
Epoch 16, Loss: 0.0890, Accuracy: 0.9679
Epoch 17, Loss: 0.0755, Accuracy: 0.9727
Epoch 18, Loss: 0.0595, Accuracy: 0.9773
Epoch 19, Loss: 0.0391, Accuracy: 0.9888
Epoch 20, Loss: 0.0503, Accuracy: 0.9835
final S-CNN train accuracy: 0.9835


<All keys matched successfully>

In [None]:
# after training both: connect them

class HybridEmotionRecognizer:
    def __init__(self, pcnn, scnn, device):
        self.pcnn = pcnn.to(device).eval()
        self.scnn = scnn.to(device).eval()
        self.device = device

        self.sub_emotion_map = {
            0: 'surprise',  # Happy path
            1: 'neutral',
            2: 'angry',     # Sad path
            3: 'fear'
        }

    def predict(self, image_tensor):
        with torch.no_grad():
            image_tensor = image_tensor.to(self.device)

            # P-CNN
            primary_logits = self.pcnn(image_tensor)
            primary_class = torch.argmax(primary_logits, dim=1).item()  # 0 = happy, 1 = sad

            # S-CNN
            sub_logits = self.scnn(image_tensor)
            sub_class = torch.argmax(sub_logits, dim=1).item()

            # Interpret result based on primary emotion
            print("primary_class:", primary_class)
            if primary_class == 0:  # Happy
                label = self.sub_emotion_map[sub_class if sub_class in [0, 1] else 1] # default neutral
                print("the subclass:", sub_class)
            else:  # Sad
                label = self.sub_emotion_map[sub_class if sub_class in [2, 3] else 2] # default anger
                print("the subclass:", sub_class)

        return {
            "primary": "happy" if primary_class == 0 else "sad",
            "secondary": label
        }


In [None]:
X_final_test[0].shape

(100, 100, 3)

In [None]:
torch.tensor( X_final_test[0])

tensor([[[0.0588, 0.1569, 0.3137],
         [0.0745, 0.1765, 0.3333],
         [0.0980, 0.2078, 0.3686],
         ...,
         [0.5059, 0.6353, 0.7373],
         [0.4980, 0.6275, 0.7294],
         [0.4941, 0.6235, 0.7255]],

        [[0.0667, 0.1765, 0.3373],
         [0.0980, 0.2157, 0.3765],
         [0.1490, 0.2745, 0.4431],
         ...,
         [0.5020, 0.6314, 0.7333],
         [0.5020, 0.6314, 0.7333],
         [0.5020, 0.6314, 0.7333]],

        [[0.0824, 0.2196, 0.3882],
         [0.1255, 0.2745, 0.4392],
         [0.1922, 0.3451, 0.5176],
         ...,
         [0.4980, 0.6275, 0.7294],
         [0.4980, 0.6275, 0.7294],
         [0.4980, 0.6275, 0.7294]],

        ...,

        [[0.1725, 0.3294, 0.5216],
         [0.1765, 0.3333, 0.5255],
         [0.1843, 0.3451, 0.5373],
         ...,
         [0.0549, 0.1765, 0.2745],
         [0.0431, 0.1647, 0.2627],
         [0.0510, 0.1725, 0.2706]],

        [[0.1569, 0.3137, 0.5059],
         [0.1608, 0.3176, 0.5098],
         [0.

In [None]:
torch.tensor(X_final_test[0]).permute(2, 0, 1).unsqueeze(0).float().shape

torch.Size([1, 3, 100, 100])

In [None]:
# run on a single preprocessed image
for i in X_final_test[:2]:
    image = torch.tensor(i).permute(2, 0, 1).unsqueeze(0).float()  # → (1, 3, 100, 100)

    pipeline = HybridEmotionRecognizer(pcnn_model, scnn_model, device=device)
    result = pipeline.predict(image)
    #print("Primary Emotion:", result["primary"])
    #print("Secondary Emotion:", result["secondary"])


primary_class: 0
the subclass: 1
primary_class: 0
the subclass: 1


In [None]:
set(y_final_test)

{np.str_('0'),
 np.str_('1'),
 np.str_('2'),
 np.str_('3'),
 np.str_('4'),
 np.str_('5'),
 np.str_('6')}

In [None]:
valid_labels = {
    '5': 0,  # Surprise
    '6': 1,  # Neutral
    '0': 2,  # Angry
    '2': 3,   # Fear
    'surprise': 0,  # Surprise
    'neutral': 1,  # Neutral
    'angry': 2,  # Angry
    'fear': 3   # Fear
}


def get_primary_label(fer_label):
    if fer_label == 3:
        return 0  # Happy
    elif fer_label == 4:
        return 1  # Sad
    else:
        return None

def evaluate_hybrid(hybrid_model, images, labels):
    correct = 0
    total = 0

    for img, label in zip(images, labels):
        if label not in valid_labels:
            continue  # skip non-SCNN labels

        # preprocess
        x = torch.tensor(img).permute(2, 0, 1).unsqueeze(0).float()

        # predict
        pred = hybrid_model.predict(x)

        expected_subclass = valid_labels[label]
        expected_primary = 0 if expected_subclass in [0, 1] else 1  # Surprise/Neutral → Happy, else Sad

        pred_subclass = {
            "surprise": 0,
            "neutral": 1,
            "angry": 2,
            "fear": 3
        }[pred["secondary"]]

        pred_primary = 0 if pred["primary"] == "happy" else 1

        if pred_primary == expected_primary and pred_subclass == expected_subclass:
            correct += 1
        total += 1

    acc = correct / total if total > 0 else 0
    print(f"Hybrid Test Accuracy (Secondary emotion): {acc:.4f} on {total} samples")


In [None]:
pipeline = HybridEmotionRecognizer(pcnn_model, scnn_model, device)
evaluate_hybrid(pipeline, X_final_test, y_final_test)

primary_class: 1
the subclass: 0
primary_class: 1
the subclass: 2
primary_class: 1
the subclass: 1
primary_class: 1
the subclass: 0
primary_class: 1
the subclass: 1
primary_class: 1
the subclass: 0
primary_class: 1
the subclass: 1
primary_class: 0
the subclass: 1
primary_class: 1
the subclass: 0
primary_class: 0
the subclass: 3
primary_class: 1
the subclass: 0
primary_class: 0
the subclass: 2
primary_class: 0
the subclass: 1
primary_class: 1
the subclass: 1
primary_class: 1
the subclass: 1
primary_class: 0
the subclass: 1
primary_class: 0
the subclass: 1
primary_class: 1
the subclass: 1
primary_class: 1
the subclass: 1
primary_class: 0
the subclass: 0
primary_class: 1
the subclass: 1
primary_class: 1
the subclass: 1
primary_class: 1
the subclass: 1
primary_class: 0
the subclass: 2
primary_class: 1
the subclass: 0
primary_class: 0
the subclass: 0
primary_class: 0
the subclass: 3
primary_class: 1
the subclass: 0
primary_class: 0
the subclass: 1
primary_class: 1
the subclass: 1
primary_cl