# **Library and Module Imports**

In [None]:
import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

In [None]:
# Step 1: Imports & Setup
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from tqdm import tqdm


In [3]:
# Step 2: Load file paths and extract labels
def parse_filename(filename):
    parts = filename.split('_')
    emotion = parts[2]  # DIS, HAP, etc.
    emotion_map = {
        'ANG': 'angry',
        'DIS': 'disgust',
        'FEA': 'fear',
        'HAP': 'happy',
        'NEU': 'neutral',
        'SAD': 'sad'
    }
    return emotion_map.get(emotion, 'unknown')

# Assuming crema_path points to folder with audio
crema_path = "dataset"
data = []

for file in os.listdir(crema_path):
    if file.endswith(".wav"):
        emotion = parse_filename(file)
        path = os.path.join(crema_path, file)
        data.append((path, emotion))

df = pd.DataFrame(data, columns=["file", "emotion"])


In [None]:
# Step 3: Feature Extraction
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)
    zcr = np.mean(librosa.feature.zero_crossing_rate(y).T, axis=0)
    rms = np.mean(librosa.feature.rms(y=y).T, axis=0)

    return np.hstack([mfcc, chroma, contrast, zcr, rms])


In [9]:
# Extract features for each audio file
X = []
y = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        features = extract_features(row["file"])
        X.append(features)
        y.append(row["emotion"])
    except Exception as e:
        print(f"Failed for {row['file']} - {str(e)}")

X = np.array(X)
y = np.array(y)


100%|██████████| 7442/7442 [01:20<00:00, 92.02it/s] 


In [None]:
# Step 4: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Train a basic SVC classifier
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

       angry       0.61      0.70      0.65       255
     disgust       0.37      0.38      0.38       254
        fear       0.33      0.26      0.29       254
       happy       0.43      0.35      0.38       254
     neutral       0.40      0.37      0.38       218
         sad       0.48      0.62      0.54       254

    accuracy                           0.45      1489
   macro avg       0.44      0.45      0.44      1489
weighted avg       0.44      0.45      0.44      1489

