In [3]:
import librosa
import numpy as np
import os
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [4]:
# 1. Feature Extraction Function
def extract_features(file_name):
    y, sr = librosa.load(file_name, duration=2.5, offset=0.6)
    # Extract MFCC features
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
    # Extract Chroma features
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    # Extract Mel Spectrogram
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    return np.hstack([mfccs, chroma, mel])

In [9]:
# 2. Load Data and Extract Features
dataset_path = 'input/'
features = []
labels = []

# Example with RAVDESS dataset structure: audio_speech_actors_01-24/Actor_*/*.wav
for file in glob.glob(os.path.join(dataset_path, "Actor_*/*.wav")):
    # Extract emotion label from the filename (3rd part of the name convention)
    emotion = int(os.path.basename(file).split('-')[2]) - 1  # RAVDESS labels are 1-8, adjust to 0-7
    features.append(extract_features(file))
    labels.append(emotion)

In [10]:
# 3. Convert to DataFrame and Encode Labels
X = np.array(features)
y = np.array(labels)

In [11]:
# 4. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [12]:
# 5. Train a Support Vector Machine (SVM) Model
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

In [13]:
# 6. Evaluate the Model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']))

              precision    recall  f1-score   support

     neutral       0.32      0.36      0.34        22
        calm       0.56      0.64      0.60        56
       happy       0.31      0.36      0.33        42
         sad       0.45      0.42      0.43        50
       angry       0.60      0.48      0.53        50
     fearful       0.35      0.41      0.38        39
     disgust       0.40      0.39      0.40        46
   surprised       0.48      0.38      0.42        55

    accuracy                           0.44       360
   macro avg       0.43      0.43      0.43       360
weighted avg       0.45      0.44      0.44       360

