In [None]:
# Importing the required Libraries
import numpy as np 
import pandas as pd
import os 
import librosa 
import wave
import matplotlib.pyplot as plt 

# MLP Classifier
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import tensorflow

#LSTM Classifier
import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import RMSprop
# from tensorflow.keras.optimizers import rmsprop

In [None]:
# Reading the dataset and feature Extraction
def extract_mfcc(wav_file_name):
    y, sr = librosa.load(wav_file_name,duration=3
                                  ,offset=0.5)
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T,axis=0)
    
    return mfccs

In [None]:
import os
radvess_speech_labels = []
ravdess_speech_data = []
path = '/Users/philipthapa/Desktop/Clustering-Based Speech Emotion Recognition by Incorporating Learned Features and Deep BiLSTM/dataset/audio_speech_actors_01-24/Actor_15/'
for dirname, _, filenames in os.walk(path):
# for dirname, _, filenames in os.walk('/kaggle/input/ravdess-emotional-speech-audio/'):
    for filename in filenames:
        radvess_speech_labels.append(int(filename[7:8]) - 1) 
        wav_file_name = os.path.join(dirname, filename)
        ravdess_speech_data.append(extract_mfcc(wav_file_name)) 
        
print("Finish Loading the Dataset")

In [None]:
# Preapring the dataset for ML model
ravdess_speech_data_array = np.asarray(ravdess_speech_data) 
ravdess_speech_label_array = np.array(radvess_speech_labels)
ravdess_speech_label_array.shape

labels_categorical = to_categorical(ravdess_speech_label_array) # converts a class vector (integers) to binary class matrix
labels_categorical.shape



In [None]:
ravdess_speech_data_array.shape

In [None]:
# Train and Test MLP Classifier
x_train,x_test,y_train,y_test= train_test_split(np.array(ravdess_speech_data_array),labels_categorical, test_size=0.20, random_state=9)

In [None]:
# Split the training, validating, and testing sets
number_of_samples = ravdess_speech_data_array.shape[0]
training_samples = int(number_of_samples * 0.8)
validation_samples = int(number_of_samples * 0.1)
test_samples = int(number_of_samples * 0.1)

In [None]:
# Define the BiLSTM model
def create_model_LSTM():
    model = Sequential()
    model.add(LSTM(128, return_sequences=False, input_shape=(40, 1)))
    model.add(Dense(64))
    model.add(Dropout(0.4))
    model.add(Activation('relu'))
    model.add(Dense(32))
    model.add(Dropout(0.4))
    model.add(Activation('relu'))
    model.add(Dense(8))
    model.add(Activation('softmax'))
    
    # Configures the model for training
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
    return model

In [None]:
w = np.expand_dims(ravdess_speech_data_array[:training_samples],-1)

In [None]:
w.shape

In [None]:
### train using LSTM model
model_A = create_model_LSTM()
history = model_A.fit(np.expand_dims(ravdess_speech_data_array[:training_samples],-1), labels_categorical[:training_samples], validation_data=(np.expand_dims(ravdess_speech_data_array[training_samples:training_samples+validation_samples], -1), labels_categorical[training_samples:training_samples+validation_samples]), epochs=200, shuffle=True)

In [None]:
### loss plots using LSTM model
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, 'ro', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
### accuracy plots using LSTM model
plt.clf()                                                

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.plot(epochs, acc, 'ro', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

*  **(evaluate function): Returns the loss value & metrics values for the model in test mode.**

In [None]:
### evaluate using model A
model_A.evaluate(np.expand_dims(ravdess_speech_data_array[training_samples + validation_samples:], -1), labels_categorical[training_samples + validation_samples:])

* **  Save the weights of the model as a HDF5 file **

In [None]:
model_A.save_weights("Model_LSTM.h5")

In [None]:
# path_ = '../input/ravdess-emotional-speech-audio/Actor_01/03-01-01-01-01-02-01.wav'
path_ = './dataset/Actor_01/03-01-01-01-01-02-01.wav'

In [None]:
import IPython.display as ipd
ipd.Audio(path_)

In [None]:
a = extract_mfcc(path_)

In [None]:
a.shape

In [None]:
a1 = np.asarray(a)

In [None]:
a1.shape

In [None]:
q = np.expand_dims(a1,-1)

In [None]:
qq = np.expand_dims(q,0)

In [None]:
qq.shape

In [None]:
pred = model_A.predict(qq)

In [None]:
pred

In [None]:
preds=pred.argmax(axis=1)
preds
# data = lb.inverse_transform(preds)

In [None]:
# 0 = neutral, 1 = calm, 2 = happy, 3 = sad, 4 = angry, 5 = fearful, 6 = disgust, 7 = surprised
