In [17]:
import cv2
import os
from glob import glob

video_root = "/kaggle/input/ravdess-emotional-speech-video/RAVDESS dataset"
save_root = "/kaggle/working/frames"
os.makedirs(save_root, exist_ok=True)

video_paths = sorted(glob(os.path.join(video_root, "**/*.mp4"), recursive=True))

for video_path in video_paths:
    file_name = os.path.basename(video_path).replace(".mp4", "")
    save_dir = os.path.join(save_root, file_name)
    os.makedirs(save_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    if ret:
        save_path = os.path.join(save_dir, "frame_0000.jpg")
        cv2.imwrite(save_path, frame)
    cap.release()


In [18]:
import pandas as pd

data = []
emotion_map = {
    1: "neutral",
    2: "calm",
    3: "happy",
    4: "sad",
    5: "angry",
    6: "fearful",
    7: "disgust",
    8: "surprised"
}

frame_dirs = sorted(glob(os.path.join(save_root, "*")))

for frame_dir in frame_dirs:
    folder_name = os.path.basename(frame_dir)
    parts = folder_name.split("-")

    if len(parts) == 7:
        emotion_code = int(parts[2])
        intensity = int(parts[3])
        statement = int(parts[4])
        repetition = int(parts[5])
        actor = int(parts[6])

        label = emotion_map.get(emotion_code)
        if label:
            frame_path = os.path.join(frame_dir, "frame_0000.jpg")
            data.append([frame_path, label, emotion_code, intensity, statement, repetition, actor])

df = pd.DataFrame(data, columns=[
    "frame_dir", "label", "emotion_code", "intensity", "statement", "repetition", "actor"
])
df.to_csv("/kaggle/working/ravdess_labels.csv", index=False)
df.head()


Unnamed: 0,frame_dir,label,emotion_code,intensity,statement,repetition,actor
0,/kaggle/working/frames/01-01-01-01-01-01-01/fr...,neutral,1,1,1,1,1
1,/kaggle/working/frames/01-01-01-01-01-01-02/fr...,neutral,1,1,1,1,2
2,/kaggle/working/frames/01-01-01-01-01-01-03/fr...,neutral,1,1,1,1,3
3,/kaggle/working/frames/01-01-01-01-01-01-04/fr...,neutral,1,1,1,1,4
4,/kaggle/working/frames/01-01-01-01-01-01-05/fr...,neutral,1,1,1,1,5


In [19]:
import pandas as pd

df = pd.read_csv("/kaggle/working/ravdess_labels.csv")

label2idx = {label: idx for idx, label in enumerate(df['label'].unique())}
df['label_idx'] = df['label'].map(label2idx)


In [20]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df['label_idx'], random_state=42
)


In [21]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  
])


In [22]:
from torch.utils.data import Dataset
from PIL import Image

class RAVDESSDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row['frame_dir']
        image = Image.open(img_path).convert("RGB")
        label = row['label_idx']
        if self.transform:
            image = self.transform(image)
        return image, label


In [23]:
from torch.utils.data import DataLoader

train_dataset = RAVDESSDataset(train_df, transform=transform)
test_dataset = RAVDESSDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [24]:
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(label2idx)

# Model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

# Class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_df['label_idx']),
    y=train_df['label_idx']
)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# Loss, optimizer
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 208MB/s]


In [25]:
def evaluate(loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total


In [27]:
import time

num_epochs = 25

for epoch in range(1, num_epochs + 1):
    print(f"\nEpoch {epoch}/{num_epochs} başladı")

    
    start_train = time.time()
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    end_train = time.time()

    start_test = time.time()
    train_acc = evaluate(train_loader)
    test_acc = evaluate(test_loader)
    end_test = time.time()


    print(f"[Epoch {epoch}] Train Accuracy: {train_acc:.4f} | Time: {end_train - start_train:.2f} sec")
    print(f"[Epoch {epoch}] Test Accuracy : {test_acc:.4f} | Time: {end_test - start_test:.2f} sec")

    scheduler.step()



Epoch 1/25 başladı
[Epoch 1] Train Accuracy: 0.5373 | Time: 24.76 sec
[Epoch 1] Test Accuracy : 0.5087 | Time: 31.37 sec

Epoch 2/25 başladı
[Epoch 2] Train Accuracy: 0.7209 | Time: 24.59 sec
[Epoch 2] Test Accuracy : 0.6597 | Time: 30.99 sec

Epoch 3/25 başladı
[Epoch 3] Train Accuracy: 0.8225 | Time: 24.77 sec
[Epoch 3] Test Accuracy : 0.7431 | Time: 31.21 sec

Epoch 4/25 başladı
[Epoch 4] Train Accuracy: 0.8503 | Time: 24.45 sec
[Epoch 4] Test Accuracy : 0.7847 | Time: 31.15 sec

Epoch 5/25 başladı
[Epoch 5] Train Accuracy: 0.8941 | Time: 24.66 sec
[Epoch 5] Test Accuracy : 0.8385 | Time: 30.96 sec

Epoch 6/25 başladı
[Epoch 6] Train Accuracy: 0.9158 | Time: 24.45 sec
[Epoch 6] Test Accuracy : 0.8594 | Time: 31.02 sec

Epoch 7/25 başladı
[Epoch 7] Train Accuracy: 0.9327 | Time: 24.42 sec
[Epoch 7] Test Accuracy : 0.8698 | Time: 30.88 sec

Epoch 8/25 başladı
[Epoch 8] Train Accuracy: 0.9457 | Time: 24.40 sec
[Epoch 8] Test Accuracy : 0.8889 | Time: 31.22 sec

Epoch 9/25 başladı
[Epo

In [28]:
torch.save(model.state_dict(), "/kaggle/working/resnet18_ravdess.pth")
