**Imports**

In [1]:
# Install required libraries (run once)
!pip install librosa soundfile scikit-learn joblib

# Imports
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import joblib
import glob
print('Setup Done')


Setup Done


**Upload Audio Files**

In [2]:
!ls /content


Audio_Song_Actors_01-24.zip  Audio_Speech_Actors_01-24.zip  sample_data


In [6]:
import zipfile

zip_path = "/content/Audio_Song_Actors_01-24.zip"
extract_path = "/content/Audio_Song_Actors_01-24"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

DATASET_PATH = extract_path


In [8]:
import glob

# Confirm it worked
!ls /content/Audio_Song_Actors_01-24

# Find all .wav files
all_files = glob.glob(os.path.join(DATASET_PATH, "**/*.wav"), recursive=True)
print("Found", len(all_files), "audio files.")


Actor_01  Actor_04  Actor_07  Actor_10	Actor_13  Actor_16  Actor_19  Actor_22
Actor_02  Actor_05  Actor_08  Actor_11	Actor_14  Actor_17  Actor_20  Actor_23
Actor_03  Actor_06  Actor_09  Actor_12	Actor_15  Actor_18  Actor_21  Actor_24
Found 1012 audio files.


**Map Emotions from Filename**

In [9]:
# Emotion code mapping
emotion_dict = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}


**Feature Extraction Function**

In [10]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=3, offset=0.5)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    return np.hstack([mfccs, chroma, mel])


**Loop Through Files & Prepare Dataset**

In [11]:
features = []
labels = []

# All .wav files in both speech and song folders
all_files = glob.glob(os.path.join(DATASET_PATH, "**/*.wav"), recursive=True)

for file in all_files:
    try:
        parts = os.path.basename(file).split("-")
        emotion_code = parts[2]
        emotion = emotion_dict.get(emotion_code)
        if emotion:  # Only include known emotions
            feature_vector = extract_features(file)
            features.append(feature_vector)
            labels.append(emotion)
    except Exception as e:
        print("Error processing", file, e)


**Train/Test Split + Model Training**

In [12]:
X = np.array(features)
y = np.array(labels)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


**Evaluate Model**

In [13]:
y_pred = model.predict(X_val)

print("Classification Report:\n", classification_report(y_val, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))
print("F1 Score (weighted):", f1_score(y_val, y_pred, average='weighted'))


Classification Report:
               precision    recall  f1-score   support

       angry       0.82      0.89      0.86        37
        calm       0.83      0.95      0.89        37
     fearful       0.81      0.78      0.79        37
       happy       0.84      0.70      0.76        37
     neutral       1.00      1.00      1.00        18
         sad       0.81      0.78      0.79        37

    accuracy                           0.84       203
   macro avg       0.85      0.85      0.85       203
weighted avg       0.84      0.84      0.84       203

Confusion Matrix:
 [[33  0  2  0  0  2]
 [ 0 35  0  2  0  0]
 [ 2  0 29  1  0  5]
 [ 3  7  1 26  0  0]
 [ 0  0  0  0 18  0]
 [ 2  0  4  2  0 29]]
F1 Score (weighted): 0.8354075568269558


**Save Model**

In [14]:
joblib.dump(model, "emotion_model.pkl")


['emotion_model.pkl']