In [22]:
#Importing Required libraries
import numpy as np
import pandas as pd
import os
import glob
import pickle
import librosa
import soundfile
import warnings
warnings.filterwarnings('ignore')

In [23]:
#Extracting the features mfcc, chroma, and mel from sound files
def extract_feature(file_name,mfcc,chroma,mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X=sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X,sr=sample_rate,n_mfcc=40).T,axis=0)
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft,sr=sample_rate).T,axis=0)
            result=np.hstack((result,chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X,sr=sample_rate).T,axis=0)
            result=np.hstack((result,mel))
        return result

In [24]:
#Creating Dictionary to connect emotions with numbers
emotions={
    '01':'neutral',
    '02':'calm',
    '03':'happy',
    '04':'sad',
    '05':'angry',
    '06':'fearful',
    '07':'disgust',
    '08':'surprised'
}

observed_emotions=['calm','happy','fearful','disgust']

In [25]:
#Extracting features and emotions from individual sound files
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob('D:\\xyz\\Personal\\Projects\\Speech Emotion Recognition\\Dataset\\Actor_*\\*.wav'):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file,mfcc=True,chroma=True,mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x),y,test_size=test_size,random_state=9)

In [71]:
#Splitting dataset into Train and Test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

In [72]:
#Checking shape of Train and Test set
print((x_train.shape[0],x_test.shape[0]))

(576, 192)


In [73]:
#Getting number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 140


In [74]:
#Initializing MLF Classifier
from sklearn.neural_network import MLPClassifier
clf=MLPClassifier(alpha=0.01,batch_size=256,epsilon=1e-08,hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500)

In [75]:
#Training our model using Train set
clf.fit(x_train,y_train)

MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [76]:
#Predicting on Test set
y_pred=clf.predict(x_test)

In [77]:
#Calculating accuacy score of our model
from sklearn.metrics import accuracy_score
ascore=accuracy_score(y_test,y_pred)
print("Accuracy Score: {:.2f}%".format(ascore*100))

Accuracy Score: 54.69%


In [78]:
#Creating Classification report for our model
from sklearn.metrics import classification_report
cr=classification_report(y_test,y_pred)
print(cr)

              precision    recall  f1-score   support

        calm       0.79      0.81      0.80        57
     disgust       0.39      0.42      0.40        48
     fearful       0.42      0.49      0.45        37
       happy       0.53      0.42      0.47        50

    accuracy                           0.55       192
   macro avg       0.53      0.53      0.53       192
weighted avg       0.55      0.55      0.55       192

