In [1]:
import numpy as np
import os
import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class MLPClassifier(nn.Module):

    def __init__(self, input_size, layer_sizes, num_classes):
        """
        Args:
        - input_size (int): Rozmiar wektora wejściowego (np. 768).
        - layer_sizes (list of int): Lista z rozmiarami każdej warstwy ukrytej.
        - num_classes (int): Liczba klas w klasyfikacji.
        """
        super().__init__()
        
        layers = []
        
        in_features = input_size
        for out_features in layer_sizes:
            layers.append(nn.Linear(in_features, out_features))
            layers.append(nn.ReLU())
            in_features = out_features
        layers.append(nn.Linear(in_features, num_classes))
        
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

In [4]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs):
    """
    Funkcja do trenowania modelu.
    
    Args:
    - model: Model PyTorch do trenowania.
    - train_loader: Dataloader dla danych treningowych.
    - val_loader: Dataloader dla danych walidacyjnych.
    - criterion: Funkcja kosztu (np. CrossEntropyLoss).
    - optimizer: Optymalizator (np. Adam).
    - epochs: Liczba epok.
    """
    model.to(device)
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_targets = []
        train_preds = []
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
            train_targets.extend(targets.cpu().numpy())
        
        train_accuracy = accuracy_score(train_targets, train_preds)
        
        model.eval()
        val_loss = 0
        val_targets = []
        val_preds = []
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                
                val_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
                val_targets.extend(targets.cpu().numpy())
        
        val_accuracy = accuracy_score(val_targets, val_preds)
        
        print(f"Epoch {epoch+1}/{epochs}")
        print(f"Train Loss: {train_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy:.4f}")
        print("-" * 50)

In [5]:
input_size = 512             
layer_sizes = [256, 128]  
num_classes = 6                

model = MLPClassifier(input_size, layer_sizes, num_classes)

batch_size = 32
learning_rate = 0.001
epochs = 20

In [6]:
files = os.listdir('data/video_data')
files = [file for file in files if file != '1076_MTI_SAD_XX.flv']

In [7]:
speaker = []
y = []
for file in files:
    emo = file.split('_')[2]
    sp = file.split('_')[0]
    y.append(emo)
    speaker.append(sp)

In [None]:
from collections import Counter
c = Counter(speaker)
print(sum(list(c.values())[:70]))

print(sum(list(c.values())[70:83]))

print(sum(list(c.values())[83:]))

5720
1065
656


In [9]:
y_map = {
    'NEU': 0,
    'HAP': 1,
    'SAD': 2,
    'ANG': 3,
    'FEA': 4,
    'DIS': 5
}

In [10]:
video_embeddings = np.load('embeddings/video_embeddings_cnn_v2.npy')

In [11]:
x_train = torch.Tensor(video_embeddings[:5720])
x_val = torch.Tensor(video_embeddings[5720:6785])
x_test = torch.Tensor(video_embeddings[6785:])
y_train = torch.Tensor([y_map[a] for a in y[:5720]]).long()
y_val = torch.Tensor([y_map[a] for a in y[5720:6785]]).long()
y_test = torch.Tensor([y_map[a] for a in y[6785:]]).long()

In [12]:
train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
val_dataset = torch.utils.data.TensorDataset(x_val, y_val)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_model(model, train_loader, val_loader, criterion, optimizer, epochs)

Epoch 1/20
Train Loss: 1.6644, Train Accuracy: 0.2675
Val Loss: 1.5300, Val Accuracy: 0.3136
--------------------------------------------------
Epoch 2/20
Train Loss: 1.4578, Train Accuracy: 0.3927
Val Loss: 1.3498, Val Accuracy: 0.4498
--------------------------------------------------
Epoch 3/20
Train Loss: 1.3682, Train Accuracy: 0.4325
Val Loss: 1.3582, Val Accuracy: 0.4441
--------------------------------------------------
Epoch 4/20
Train Loss: 1.2672, Train Accuracy: 0.4836
Val Loss: 1.1491, Val Accuracy: 0.5343
--------------------------------------------------
Epoch 5/20
Train Loss: 1.2224, Train Accuracy: 0.5100
Val Loss: 1.1830, Val Accuracy: 0.5005
--------------------------------------------------
Epoch 6/20
Train Loss: 1.1826, Train Accuracy: 0.5226
Val Loss: 1.1893, Val Accuracy: 0.5146
--------------------------------------------------
Epoch 7/20
Train Loss: 1.1456, Train Accuracy: 0.5371
Val Loss: 1.0392, Val Accuracy: 0.5784
-------------------------------------------