In [2]:
#Make the necessary imports:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [3]:
#Define a function extract_feature to extract the mfcc, chroma, and mel features from a sound file.
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [4]:
#Now, let’s define a dictionary to hold numbers and the emotions available in the RAVDESS dataset, and a list to hold those we want- calm, happy, fearful, disgust.
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [5]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.25):
    x,y=[],[]
    for file in glob.glob("C:\DataFlair\speech-emotion-recognition-ravdess-data\Actor_*\*.wav"):
        file_name=os.path.basename(file)
        #print(file_name)
        file_name_fragment = file_name.split("-")[2]
        #print(file_name_fragment)
        emotion=emotions[file_name_fragment]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)
#speech-emotion-recognition-ravdess-data\*\*

In [6]:
load_data()

[array([[-5.22057312e+02,  3.50732536e+01,  3.75979710e+00, ...,
          1.65243197e-04,  1.04321633e-04,  6.55571930e-05],
        [-6.41207336e+02,  4.49728851e+01, -1.83886874e+00, ...,
          3.89262023e-05,  3.05255780e-05,  2.94166657e-05],
        [-6.50698486e+02,  5.30313988e+01, -4.91021967e+00, ...,
          4.75216984e-05,  3.46632660e-05,  1.62844553e-05],
        ...,
        [-5.50096191e+02,  1.70297680e+01, -1.14575634e+01, ...,
          1.51764645e-04,  1.16828531e-04,  8.47479387e-05],
        [-5.55371155e+02,  4.71378479e+01,  1.10560827e+01, ...,
          1.61086486e-04,  1.04962528e-04,  6.52812014e-05],
        [-5.04864716e+02,  3.52971039e+01, -1.44038277e+01, ...,
          6.08151604e-04,  5.55269711e-04,  4.47782280e-04]]),
 array([[-6.01416138e+02,  6.33363152e+01, -1.16155605e+01, ...,
          7.30560168e-06,  4.64440200e-06,  1.78248013e-06],
        [-7.87200317e+02,  5.97919350e+01,  2.11022377e+01, ...,
          4.30602057e-07,  4.01216198e

In [7]:
#Time to split the dataset into training and testing sets! Let’s keep the test set 25% of everything and use the load_data function for this.
#DataFlair - Split the dataset
def load_data(test_size=0.25):
    x,y=[],[]
    with zipfile.ZipFile("speech-emotion-recognition-ravdess-data.zip", "r") as z:
        z.extractall("temp")
    for file in glob.glob("temp\speech-emotion-recognition-ravdess-data\Actor_*\*.wav"):
        x_train,x_test,y_train,y_test=load_data(test_size=0.25)
#Actor_*\*.wav                          

In [8]:
#Observe the shape of the training and testing datasets
#DataFlair - Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

NameError: name 'x_train' is not defined

In [None]:
#And get the number of features extracted.
#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')


In [None]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)


In [None]:
#Fit/train the model.

#DataFlair - Train the model
model.fit(x_train,y_train)

In [None]:
#Let’s predict the values for the test set. This gives us y_pred (the predicted emotions for the features in the test set).
#DataFlair - Predict for the test set
y_pred=model.predict(x_test)


In [None]:
#To calculate the accuracy of our model, we’ll call up the accuracy_score() function we imported from sklearn. Finally, we’ll round the accuracy to 2 decimal places and print it out.
#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))