In [None]:
import soundfile
import numpy as np
import librosa
import glob
import os
from sklearn.model_selection import train_test_split
print(np.__version__)

# all emotions on RAVDESS dataset
int2emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# we allow only these emotions
AVAILABLE_EMOTIONS = {
    "angry",
    "sad",
    "neutral",
    "happy"
}


In [48]:
import numpy as np
import soundfile as sf
import librosa

def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        
        result = np.array([])

        def ensure_1d(feature, name):
            if feature.ndim > 1:
                print(f"{name} original shape: {feature.shape}")
                feature = np.mean(feature, axis=1)
                print(f"{name} shape after mean: {feature.shape}")
            return feature.flatten()

        if mfcc:
            mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
            mfccs_flat = ensure_1d(mfccs, "MFCCs")
            result = np.hstack((result, mfccs_flat))
        
        if chroma:
            chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate)
            chroma_flat = ensure_1d(chroma, "Chroma")
            result = np.hstack((result, chroma_flat))
        
        if mel:
            mel = librosa.feature.melspectrogram(y=X, sr=sample_rate)
            mel_flat = ensure_1d(mel, "MEL")
            result = np.hstack((result, mel_flat))
        
        if contrast:
            contrast = librosa.feature.spectral_contrast(S=stft, sr=sample_rate)
            contrast_flat = ensure_1d(contrast, "Contrast")
            result = np.hstack((result, contrast_flat))
        
        if tonnetz:
            tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate)
            tonnetz_flat = ensure_1d(tonnetz, "Tonnetz")
            result = np.hstack((result, tonnetz_flat))
    
    print(f"Final feature shape: {result.shape}")
    return result


In [1]:
import os
import glob
from sklearn.model_selection import train_test_split

def load_data(test_size=0.2):
    X, y = [], []
    files = glob.glob(r"\Actors_1\Actor_*\*.wav")  # Update this path
    print("Files found:", len(files))
    
    for file in files:
        # get the base name of the audio file
        basename = os.path.basename(file)
        # get the emotion label
        emotion = int2emotion.get(basename.split("-")[2])
        print(f"Processing file: {file}, Emotion: {emotion}")
        
        # we allow only AVAILABLE_EMOTIONS we set
        if emotion in AVAILABLE_EMOTIONS:
            # extract speech features
            features = extract_feature(file, mfcc=True, chroma=True, mel=True)
            print(f"Extracted features shape: {features.shape}")
            # add to data
            X.append(features)
            y.append(emotion)
        else:
            print(f"Skipping emotion: {emotion}")
    
    if not X:
        raise ValueError("No data loaded - check file paths and emotion filtering.")
    
    # split the data to training and testing and return it
    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)

# Try to load the data
try:
    X_train, X_test, y_train, y_test = load_data(test_size=0.25)
    print("Data loaded successfully")
    print("[+] Number of training samples:", len(X_train))
    print("[+] Number of testing samples:", len(X_test))
except ValueError as e:
    print("Error:", e)


ModuleNotFoundError: No module named 'numpy'

In [46]:
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score

import os
import pickle

In [47]:
# X_train, X_test, y_train, y_test = load_data(test_size=0.25)
# Try to load the data
# try:
#     X_train, X_test, y_train, y_test = load_data(test_size=0.25)
#     print("Data loaded successfully")
#     print("[+] Number of training samples:", len(X_train))
#     print("[+] Number of testing samples:", len(X_test))
# except ValueError as e:
#     print("Error:", e)
print(type(X_train))


<class 'NoneType'>


In [43]:
# print some details
# number of samples in training data
print("[+] Number of training samples:", X_train.shape[0])
# number of samples in testing data
print("[+] Number of testing samples:", X_test.shape[0])
# number of features used
# this is a vector of features extracted
# using utils.extract_features() method
print("[+] Number of features:", X_train.shape[1])
# best model, determined by a grid search
model_params = {
    "alpha": 0.01,
    "batch_size": 256,
    "epsilon": 1e-08,
    "hidden_layer_sizes": (300,),
    "learning_rate": "adaptive",
    "max_iter": 500,
}
# initialize Multi Layer Perceptron classifier
# with best parameters ( so far )
model = MLPClassifier(**model_params)

# train the model
print("[*] Training the model...")
model.fit(X_train, y_train)

# predict 25% of data to measure how good we are
y_pred = model.predict(X_test)

# calculate the accuracy
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy * 100))

# now we save the model
# make result directory if doesn't exist yet
if not os.path.isdir("result"):
    os.mkdir("result")

pickle.dump(model, open("result/mlp_classifier.model", "wb"))

NameError: name 'X_train' is not defined