In [3]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report
)
import joblib

# =====================================================
# FEATURE EXTRACTION
# =====================================================
def extract_features(file_path, max_pad_len=174):
    signal, sr = librosa.load(file_path, sr=22050)
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=40)
    if mfcc.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad_len]
    return mfcc

# =====================================================
# DATASET PREPARATION
# =====================================================
data_dirs = [
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\voice_data\\train_data\\SER",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_01",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_02",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_03",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_04",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_05",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_06",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_07",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_08",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_09",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_10",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_11",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_12",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_13",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_14",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_15",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_16",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_17",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_18",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_19",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_20",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_21",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_22",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_23",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_24",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Angry",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Happy",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Natural",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Sad",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Surprised"
]

X, y, emotions = [], [], []
for data_dir in data_dirs:
    if not os.path.exists(data_dir):
        print(f"‚ö†Ô∏è Warning: {data_dir} not found, skipping...")
        continue
    for file in os.listdir(data_dir):
        if file.endswith(".wav"):
            label = file.split("_")[-1].replace(".wav", "")
            if label not in emotions:
                emotions.append(label)
            path = os.path.join(data_dir, file)
            mfcc = extract_features(path)
            X.append(mfcc.flatten())
            y.append(emotions.index(label))

X = np.array(X)
y = np.array(y)
if len(X) == 0:
    raise RuntimeError("‚ùå No audio data found! Check dataset paths.")

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.15, random_state=42
)

# =====================================================
# MODEL TRAINING / LOADING
# =====================================================
model_path = "svm_emotion_model.joblib"
choice = input("Do you want to load the saved model? (yes/no): ").strip().lower()

if choice == "yes" and os.path.exists(model_path):
    print("üîÅ Loading saved SVM model...")
    model_data = joblib.load(model_path)
    model = model_data["model"]
    emotions = model_data["emotions"]
else:
    print("üß† Training new SVM model...")

    model = SVC(kernel="rbf", C=10, gamma="scale", probability=True)
    print("Training...")
    model.fit(X_train, y_train)

    # Evaluate on training and validation sets
    y_train_pred = model.predict(X_train)
    y_val_pred = model.predict(X_val)

    train_acc = accuracy_score(y_train, y_train_pred)
    val_acc = accuracy_score(y_val, y_val_pred)
    precision = precision_score(y_val, y_val_pred, average="weighted", zero_division=0)
    recall = recall_score(y_val, y_val_pred, average="weighted", zero_division=0)
    f1 = f1_score(y_val, y_val_pred, average="weighted", zero_division=0)

    print("\n================= Model Performance =================")
    print(f"üèãÔ∏è Training Accuracy  : {train_acc * 100:.2f}%")
    print(f"üß™ Validation Accuracy : {val_acc * 100:.2f}%")
    print("------------------------------------------------------")
    print(f"üéØ Precision (weighted): {precision * 100:.2f}%")
    print(f"üìà Recall (weighted)   : {recall * 100:.2f}%")
    print(f"üî• F1-Score (weighted) : {f1 * 100:.2f}%")
    print("======================================================")

    print("\nClassification Report:")
    print(classification_report(y_val, y_val_pred, target_names=emotions, digits=4))

    joblib.dump({"model": model, "emotions": emotions}, model_path)
    print(f"üíæ Model saved to {model_path}")

# =====================================================
# PREDICTION FUNCTION
# =====================================================
def predict_emotion(file_path):
    mfcc = extract_features(file_path)
    mfcc = mfcc.flatten().reshape(1, -1)
    pred = model.predict(mfcc)[0]
    return emotions[pred]

# =====================================================
# TEST PREDICTION
# =====================================================
test_file = "K:\\Code\\Project\\Research Paper\\Emotion Detection\\voice_data\\test_data\\F_01_OISHI_S_2_SURPRISE_2.wav"
if os.path.exists(test_file):
    print("Predicted Emotion:", predict_emotion(test_file))
else:
    print("‚ö†Ô∏è Test file not found:", test_file)


üß† Training new SVM model...
Training...

üèãÔ∏è Training Accuracy  : 93.54%
üß™ Validation Accuracy : 80.08%
------------------------------------------------------
üéØ Precision (weighted): 80.58%
üìà Recall (weighted)   : 80.08%
üî• F1-Score (weighted) : 80.17%

Classification Report:
              precision    recall  f1-score   support

       angry     0.8448    0.7967    0.8201       123
     disgust     0.9048    0.8837    0.8941        86
        fear     0.9062    0.8969    0.9016        97
       happy     0.8053    0.6894    0.7429       132
     neutral     0.8646    0.8557    0.8601        97
          ps     0.6978    0.7886    0.7405       123
         sad     0.7105    0.7642    0.7364       106
        calm     0.6500    0.7647    0.7027        34

    accuracy                         0.8008       798
   macro avg     0.7980    0.8050    0.7998       798
weighted avg     0.8058    0.8008    0.8017       798

üíæ Model saved to svm_emotion_model.joblib
Predicted