# Emo-DB Emotion Recognition Model
Authors: Sylas Chacko, Ashley Chen, Omari Motta

In [6]:
# Imports

import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
from glob import glob
import librosa 
import librosa.display
import IPython.display as ipd
import functools
from pathlib import Path
import pickle
import copy
import os
import sklearn.neural_network
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split


In [2]:
# Reading in feature extractions

df_Mel = pd.read_csv(r'C:\Users\sylas\OneDrive\emo-db-project\working\Mel_Spec_features.csv')
df_Chr = pd.read_csv(r'C:\Users\sylas\OneDrive\emo-db-project\working\Chroma_STFT_features.csv')
df_MFCC = pd.read_csv(r'C:\Users\sylas\OneDrive\emo-db-project\working\MFCCs_features.csv')


In [3]:
# Preprocessing

# Extract features and labels
df = pd.concat([df_MFCC, df_Mel.drop('Emotion', axis='columns'), df_Chr.drop('Emotion', axis='columns')], axis=1)

# Create a new column 'Section' that indexes each group of 87 rows
df['Index'] = df.index // 87  # Integer division of the index by 87

# Function to split DataFrame into chunks
def split_dataframe(df, chunk_size):
    return [df.iloc[i:i + chunk_size] for i in range(0, len(df), chunk_size)]

# Split DataFrame into chunks of 87 rows each
chunks = split_dataframe(df, 87)


# Shuffle each chunk
shuffled_chunks = [chunk.sample(frac=1).reset_index(drop=True) for chunk in chunks]

# Concatenate the shuffled chunks back together
shuffled_df = pd.concat(shuffled_chunks).reset_index(drop=True)
shuffled_index = np.random.permutation(df.index)

df_shuffled = df.iloc[shuffled_index].reset_index(drop=True)

# Calculate the split index
train_size = int(0.8 * len(df_shuffled))
test_size = len(df_shuffled) - train_size

# Split into train and test sets
train_df = df_shuffled.iloc[:train_size]
test_df = df_shuffled.iloc[train_size:]

y_train, y_test = np.array(train_df['Emotion'].values), np.array(test_df['Emotion'].values)

X1_train, X1_test= np.array(train_df.iloc[:,1:14].values), np.array(test_df.iloc[:,1:14].values)

X2_train, X2_test = np.array(train_df.iloc[:,14:142].values), np.array(test_df.iloc[:,14:142].values)

X3_train, X3_test = np.array(train_df.iloc[:,142:154].values), np.array(test_df.iloc[:,142:154].values)

X_train = np.array(train_df.iloc[:,1:154].values)
X_test = np.array(test_df.iloc[:,1:154].values)

# Convert labels to categorical (one-hot encoding) TRAIN

# Label encoding using pandas' factorize method
labels_train, unique = pd.factorize(y_train)
labels_test = pd.Series(y_test).map(lambda x: np.where(unique == x)[0][0]).values

# Convert labels to one-hot encoding
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]  # Using np.eye to create a one-hot encoded matrix

# Get number of classes from unique labels
num_classes = len(unique)

# One-hot encoding for training data
y_train_onehot = one_hot_encode(labels_train, num_classes)

# One-hot encoding for testing data
y_test_onehot = one_hot_encode(labels_test, num_classes)


In [7]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_onehot, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_onehot, dtype=torch.float32)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
# Feedforward Pass

class EmotionClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(EmotionClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return self.softmax(x)


In [9]:
# Initializing Model, Loss Function, and Optimizer

input_size = 153  # Number of features
hidden_size = 256  # Number of neurons in the hidden layers (this can be tuned)
num_classes = num_classes  # Number of emotion classes

model = EmotionClassifier(input_size, hidden_size, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()  # CrossEntropy is more appropriate for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [10]:
# Training the Model

num_epochs = 50  # Set the number of epochs

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for i, (inputs, labels) in enumerate(train_loader):
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

Epoch [1/50], Loss: 1.8371
Epoch [2/50], Loss: 1.8230
Epoch [3/50], Loss: 1.7991
Epoch [4/50], Loss: 1.7992
Epoch [5/50], Loss: 1.8108
Epoch [6/50], Loss: 1.8231
Epoch [7/50], Loss: 1.8057
Epoch [8/50], Loss: 1.7947
Epoch [9/50], Loss: 1.8045
Epoch [10/50], Loss: 1.8180
Epoch [11/50], Loss: 1.8052
Epoch [12/50], Loss: 1.7958
Epoch [13/50], Loss: 1.8066
Epoch [14/50], Loss: 1.7956
Epoch [15/50], Loss: 1.7977
Epoch [16/50], Loss: 1.7924
Epoch [17/50], Loss: 1.7947
Epoch [18/50], Loss: 1.7958
Epoch [19/50], Loss: 1.8046
Epoch [20/50], Loss: 1.8146
Epoch [21/50], Loss: 1.8042
Epoch [22/50], Loss: 1.8031
Epoch [23/50], Loss: 1.8178
Epoch [24/50], Loss: 1.8314
Epoch [25/50], Loss: 1.8360
Epoch [26/50], Loss: 1.8319
Epoch [27/50], Loss: 1.8162
Epoch [28/50], Loss: 1.8036
Epoch [29/50], Loss: 1.7949
Epoch [30/50], Loss: 1.8032
Epoch [31/50], Loss: 1.7983
Epoch [32/50], Loss: 1.7960
Epoch [33/50], Loss: 1.8155
Epoch [34/50], Loss: 1.7920
Epoch [35/50], Loss: 1.7945
Epoch [36/50], Loss: 1.7936
E

In [11]:
# Model Evaluation

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, labels = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on test set: {100 * correct / total:.2f}%')

Accuracy on test set: 34.53%


In [None]:
# Saving the Model

torch.save(model.state_dict(), 'emotion_classifier_model.pth')