## Welcome to my speech emotion project

We import the bookstore we will need

In [1]:
import librosa 
import soundfile 
import numpy as np
import os, glob
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [39]:
emotion_labels = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

focused_emotion_labels = ['happy', 'sad', 'angry']

We create a function for the Audio preprocessing

In [3]:
def extract_feature(file_name, mfcc, chroma, mel):
    """ Audio preprocessing """
    with soundfile.SoundFile(file_name) as sound_file:
        audio, sample_rate= librosa.load(file_name)
        if chroma:
            stft=np.abs(librosa.stft(audio))
            result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel: 
            mel=np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
        return result


In [41]:
def loading_audio_data():
    x = []
    y = []
    for file in glob.glob("C://Users//julie//OneDrive//Escritorio//emotionProject//Data//Actor_*//*.wav"):
        file_path=os.path.basename(file)
        emotion = emotion_labels[file_path.split("-")[2]]
        if emotion not in focused_emotion_labels:
            continue
        feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
        
        x.append(feature)
        y.append(emotion)
    final_dataset = train_test_split(np.array(x), y, test_size=0.1, random_state=9)
    return final_dataset

In [42]:
X_train, X_test, y_train, y_test = loading_audio_data()

In [43]:
model = MLPClassifier(hidden_layer_sizes=(200,), learning_rate='adaptive', max_iter=400)  # We create the model

In [44]:
model.fit(X_train,y_train) # We train the model

In [45]:
y_pred = model.predict(X_test) # Predict the model
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy of the Recognizer is: {:.1f}%".format(accuracy*100))

Accuracy of the Recognizer is: 82.8%


In [None]:
from joblib import dump

dump(model, 'speech_emotion.joblib') # We save the created model

In [2]:
from joblib import load
emotion_model = load('speech_emotion.joblib') # We load the model 


In [5]:
file_name = "C://Users//julie//OneDrive//Escritorio//emotionProject//ej.wav"

test1 = extract_feature(file_name, mfcc=True, chroma=True, mel=True) # Audio preprocessing


In [6]:
pred = emotion_model.predict([test1])  #Predict the model
pred[0] #We visualize the prediction

'happy'