# RAF-DB Dataset

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import kagglehub
import matplotlib.pyplot as plt
import kagglehub

In [2]:
# Download two datasets
path_raf = kagglehub.dataset_download("shuvoalok/raf-db-dataset")
print("RAF‑DB dataset path:", path_raf)

path_fer = kagglehub.dataset_download("msambare/fer2013")
print("FER‑2013 dataset path:", path_fer)

Downloading from https://www.kaggle.com/api/v1/datasets/download/shuvoalok/raf-db-dataset?dataset_version_number=2...


100%|██████████| 37.7M/37.7M [00:02<00:00, 17.8MB/s]

Extracting files...





RAF‑DB dataset path: /root/.cache/kagglehub/datasets/shuvoalok/raf-db-dataset/versions/2
Downloading from https://www.kaggle.com/api/v1/datasets/download/msambare/fer2013?dataset_version_number=1...


100%|██████████| 60.3M/60.3M [00:02<00:00, 22.0MB/s]

Extracting files...





FER‑2013 dataset path: /root/.cache/kagglehub/datasets/msambare/fer2013/versions/1


In [3]:
dataset_path_raf = '/root/.cache/kagglehub/datasets/shuvoalok/raf-db-dataset/versions/2/'

train_labels_csv_raf = os.path.join(dataset_path_raf, 'train_labels.csv')
test_labels_csv_raf = os.path.join(dataset_path_raf, 'test_labels.csv')

images_set_raf = os.path.join(dataset_path_raf, 'DATASET')

images_train_set_raf = os.path.join(images_set_raf, 'train')
images_test_set_raf = os.path.join(images_set_raf, 'test')

In [14]:
# RAF labels
# 1 = Surprise
# 2 = Fear
# 3 = Disgust
# 4 = Happy
# 5 = Sad
# 6 = Angry
# 7 = Neutral

# Choose 100x100 for both target datasets
target_size = (100, 100)

# Translate RAF labels to match FER's style
raf_to_fer = {
  '1': '5',   # Surprise
  '2': '2',   # Fear
  '3': '1',   # Disgust
  '4': '3',   # Happy
  '5': '4',   # Sad
  '6': '0',   # Angry
  '7': '6'    # Neutral
}

In [16]:
# Load RAF training labels CSV
train_labels_df_raf = pd.read_csv(train_labels_csv_raf)

# Load RAF training images
raf_train_images = []
raf_train_labels = []

for idx, row in train_labels_df_raf.iterrows():
  img_filename = row['image']
  original_label = str(row['label'])

  # Translate label
  if original_label not in raf_to_fer:
    print(f"CAnnot find RAF label: {original_label}")
    continue
  mapped_label = raf_to_fer[original_label]

  img_path = os.path.join(images_train_set_raf, original_label, img_filename)
  img = cv2.imread(img_path)

  # Resize and normalize image
  img = cv2.resize(img, target_size)
  img = img.astype('float32') / 255.0

  raf_train_images.append(img)
  raf_train_labels.append(mapped_label)

X_raf_train = np.array(raf_train_images)
y_raf_train = np.array(raf_train_labels)
print("RAF‑DB training images shape:", X_raf_train.shape)

RAF‑DB training images shape: (12271, 100, 100, 3)


In [21]:
raf_test_images = []
raf_test_labels = []

test_labels_df_raf = pd.read_csv(test_labels_csv_raf)
for idx, row in test_labels_df_raf.iterrows():
  img_filename = row['image']
  original_label = str(row['label'])
  if original_label not in raf_to_fer:
    print(f"CAnnot find RAF label: {original_label}")
    continue
  mapped_label = raf_to_fer[original_label]

  img_path = os.path.join(images_test_set_raf, original_label, img_filename)
  img = cv2.imread(img_path)

  img = cv2.resize(img, target_size)
  img = img.astype('float32') / 255.0

  raf_test_images.append(img)
  raf_test_labels.append(mapped_label)

X_raf_test = np.array(raf_test_images)
y_raf_test = np.array(raf_test_labels)
print("RAF‑DB test images shape:", X_raf_test.shape)

# Combine training and test sets of RAF
X_raf = np.concatenate((X_raf_train, X_raf_test), axis=0)
y_raf = np.concatenate((y_raf_train, y_raf_test), axis=0)
print("Combined RAF‑DB images shape:", X_raf.shape)

RAF‑DB test images shape: (3068, 100, 100, 3)
Combined RAF‑DB images shape: (15339, 100, 100, 3)


# FER-2013 Dataset

In [19]:
dataset_path_fer = '/root/.cache/kagglehub/datasets/msambare/fer2013/versions/1'

images_train_set_fer = os.path.join(dataset_path_fer, 'train')
images_test_set_fer = os.path.join(dataset_path_fer, 'test')

In [22]:
# FER Labels
# 0 = Angry
# 1 = Disgust
# 2 = Fear
# 3 = Happy
# 4 = Sad
# 5 = Surprise
# 6 = Neutral

CATEGORIES_FER = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

# Map folder names to unified labels:
fer_mapping = {
  'angry': '0',
  'disgust': '1',
  'fear': '2',
  'happy': '3',
  'sad': '4',
  'surprise': '5',
  'neutral': '6'
}

def resize_image(img, target_size):
  return cv2.resize(img, target_size)

In [27]:
fer_train_images = []
fer_train_labels = []

for category in CATEGORIES_FER:
  folder = os.path.join(images_train_set_fer, category)

  for img_file in os.listdir(folder):
    img_path = os.path.join(folder, img_file)
    img = cv2.imread(img_path)
    if img is None:
      print(f"Failed to load image: {img_path}")
      continue

    # FER images are originally 48x48; resize to 100x100 to be consistent with RAF
    img = resize_image(img, target_size)
    img = img.astype('float32') / 255.0
    fer_train_images.append(img)
    fer_train_labels.append(category)

X_fer_train = np.array(fer_train_images)
y_fer_train = np.array(fer_train_labels)
print("FER‑2013 training images shape:", X_fer_train.shape)

FER‑2013 training images shape: (28709, 100, 100, 3)


In [28]:
fer_test_images = []
fer_test_labels = []

for category in CATEGORIES_FER:
  folder = os.path.join(images_test_set_fer, category)

  for img_file in os.listdir(folder):
    img_path = os.path.join(folder, img_file)
    img = cv2.imread(img_path)
    if img is None:
      print(f"Failed to load image: {img_path}")
      continue

    img = resize_image(img, target_size)
    img = img.astype('float32') / 255.0
    fer_test_images.append(img)
    fer_test_labels.append(category)

X_fer_test = np.array(fer_test_images)
y_fer_test = np.array(fer_test_labels)
print("FER‑2013 test images shape:", X_fer_test.shape)

# Combine FER training and test sets
X_fer = np.concatenate((X_fer_train, X_fer_test), axis=0)
y_fer = np.concatenate((y_fer_train, y_fer_test), axis=0)
print("Combined FER‑2013 images shape:", X_fer.shape)

FER‑2013 test images shape: (7178, 100, 100, 3)
Combined FER‑2013 images shape: (35887, 100, 100, 3)


In [25]:
X_combined_total = np.concatenate((X_raf, X_fer), axis=0)
y_combined_total = np.concatenate((y_raf, y_fer), axis=0)
print("\nTotal combined images shape:", X_combined_total.shape)
print("Total combined labels shape:", y_combined_total.shape)


Total combined images shape: (51226, 100, 100, 3)
Total combined labels shape: (51226,)


In [26]:
X_final_train, X_final_test, y_final_train, y_final_test = train_test_split(
  X_combined_total, y_combined_total, test_size=0.2, random_state=42, shuffle=True, stratify=y_combined_total
)

print("\nFinal Training set shape:", X_final_train.shape, y_final_train.shape)
print("Final Test set shape:", X_final_test.shape, y_final_test.shape)


Final Training set shape: (40980, 100, 100, 3) (40980,)
Final Test set shape: (10246, 100, 100, 3) (10246,)
