In [2]:
import librosa, soundfile, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import os

# --- Feature Extraction ---
def extract_feature(file_name, mfcc=True, chroma=True, mel=True):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            stft = np.abs(librosa.stft(X))
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))
    return result

# --- Load Dataset ---
emotions = {'01':'neutral','02':'calm','03':'happy','04':'sad','05':'angry','06':'fearful','07':'disgust','08':'surprised'}
observed_emotions = ['calm', 'happy', 'fearful', 'disgust']

data_dir = "/content/drive/MyDrive/dataset"
X, y = [], []

if not os.path.exists(data_dir):
    print(f"Error: Dataset directory not found at {data_dir}")
else:
    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".wav"):
                try:
                    emotion = emotions[file.split("-")[2]]
                    if emotion in observed_emotions:
                        feature = extract_feature(os.path.join(root, file))
                        X.append(feature)
                        y.append(emotion)
                except (IndexError, KeyError) as e:
                    print(f"Skipping file with unexpected name format: {file} - {e}")


if not X:
    print("Error: No audio files matching the criteria were found in the dataset directory.")
else:
    # --- Train/Test Split ---
    X_train, X_test, y_train, y_test = train_test_split(np.array(X), y, test_size=0.25, random_state=42)

    # --- Train the Model ---
    model = MLPClassifier(hidden_layer_sizes=(256, 128, 64), batch_size=64, max_iter=500, learning_rate_init=0.001)
    model.fit(X_train, y_train)

    # --- Evaluate ---
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_true=y_test, y_pred=y_pred)
    print(f"Model Accuracy: {acc*100:.2f}%")

    # --- Save the Model ---
    with open("modelForPrediction.sav", "wb") as f:
        pickle.dump(model, f)

    print("âœ… Model retrained and saved successfully!")

Error: Dataset directory not found at /content/drive/MyDrive/dataset
Error: No audio files matching the criteria were found in the dataset directory.
