In [1]:
import pandas as pd
import numpy as np
import os
from keras.models import Sequential, Model, model_from_json
# Importing required libraries 
import keras 

import seaborn as sns
import matplotlib.pyplot as plt
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

In [2]:
from IPython.display import Audio
import librosa
import librosa.display
import warnings
warnings.filterwarnings("ignore")

In [3]:
paths = []
labels = []
for dirname, _, filenames in os.walk('/kaggle/input/toronto-emotional-speech-set-tess'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
print('Dataset is Loaded')






In [4]:
tess_df = pd.DataFrame()
tess_df['label'] = labels
tess_df['speech'] = paths
tess_df.label.replace({'ps':'surprise'}, inplace=True)
tess_df.head()

In [5]:
tess_df['label'].value_counts()
sns.countplot(tess_df['label'])

In [18]:
Ravdess="/kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"
ravdess_directory_list = os.listdir(Ravdess)

file_emotion = []
file_path = []
for dir in ravdess_directory_list:
    # as their are 20 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(Ravdess + dir)
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        # third part in each file represents the emotion associated to that file.
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['label'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['speech'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)

# changing integers to actual emotions.
Ravdess_df.label.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
Ravdess_df=Ravdess_df[Ravdess_df['label'] != 'calm']
Ravdess_df.head()

In [19]:
Ravdess_df['label'].value_counts()
sns.countplot(Ravdess_df['label'])

In [40]:
SAVEE="../input/surrey-audiovisual-expressed-emotion-savee/ALL/"
dir_list = os.listdir(SAVEE)

emotion=[]
path = []
for i in dir_list:
    if i[-8:-6]=='_a':
        emotion.append('angry')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
    elif i[-8:-6]=='sa':
        emotion.append('sad')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
    path.append(SAVEE + i)


    
SAVEE_df = pd.DataFrame(emotion, columns = ['label'])
SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(path, columns = ['speech'])], axis = 1)
SAVEE_df.head()

In [9]:
SAVEE_df['label'].value_counts()
sns.countplot(SAVEE_df['label'])

In [28]:
#voting set

cremad="/kaggle/input/cremad/AudioWAV/"
crema_directory_list = os.listdir(cremad)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(cremad + file)
    # storing file emotions
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files

emotion_df = pd.DataFrame(file_emotion, columns=['label'])
path_df = pd.DataFrame(file_path, columns=['speech'])
crema_df = pd.concat([emotion_df, path_df], axis=1)

crema_df.head()

In [41]:
crema_df['label'].value_counts()
sns.countplot(crema_df['label'])

In [30]:
#train dbs -> ravdess+tess
#manual test db -> cremad 

df = pd.concat([Ravdess_df, tess_df,SAVEE_df,crema_df], axis = 0)
df.to_csv("df.csv",index=False)
df.head()






In [31]:
df['label'].value_counts()
sns.countplot(df['label'])

In [33]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.ylabel("Amplitude")
    plt.show()
    
def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11,4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

In [34]:
emotion = 'fear'
path = np.array(df['speech'][df['label']==emotion])[700]
print(path)
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [35]:
emotion = 'happy'
path = np.array(df['speech'][df['label']==emotion])[700]
print(path)
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [36]:
emotion = 'surprise'
path = np.array(df['speech'][df['label']==emotion])[700]
print(path)
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [42]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc



In [43]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))

X_mfcc


In [44]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

X = np.expand_dims(X, -1) #to thelei etsi to modeloo
X.shape





In [45]:
from sklearn.preprocessing import OneHotEncoder #gia kathe input vazei ti einai to label tou
enc = LabelEncoder()
yi = enc.fit_transform(df[['label']])
labelss=enc.classes_
print(labelss)




In [46]:
from sklearn.preprocessing import OneHotEncoder #gia kathe input vazei ti einai to label tou


y = df['label'].values
encoder = OneHotEncoder() 
y = encoder.fit_transform(np.array(y).reshape(-1,1)).toarray()
print(y[4])
print(np.shape(y))
print(labelss)
print(df.head())

In [136]:
df['label'].value_counts()
sns.countplot(df['label'])

In [48]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape






In [49]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(x_train.shape[1], 1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(7, activation='softmax') #gia binary vazei sigmoid edw exoume 7 diaforetika labels omws
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [50]:
import keras
from keras.callbacks import ReduceLROnPlateau

rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
#psaksimo gia orologies (mexri na treksei i malakia)
#epoch: poses fores perna ena olokliro dataset apo to neurwniko->theleis polla gia na ginoun updates sta weights
#dropout: to tuxaio skipparisma kombwn sta epimerous layers wste na epiluthei to thema overfitting/underfitting
#batch_size: pws spaei ana iteration to dataset (diladi se stoivades twn 64)
#https://github.com/christianversloot/machine-learning-articles/blob/main/what-is-dropout-reduce-overfitting-in-your-neural-networks.md


In [None]:
type(history)


In [51]:
epochs = list(range(50))
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.plot(epochs, acc, label='train accuracy')
plt.plot(epochs, val_acc, label='val accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [52]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs, loss, label='train loss')
plt.plot(epochs, val_loss, label='val loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()


In [53]:
accuracy_final = sum(val_acc)/50
print(accuracy_final*100)

In [54]:
from sklearn.preprocessing import OneHotEncoder

#encoder=OneHotEncoder()
pred_test = model.predict(x_test)
y_pred = encoder.inverse_transform(pred_test)
y_test = encoder.inverse_transform(y_test)

In [55]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize = (12, 10))
cm = pd.DataFrame(cm , index = [i for i in encoder.categories_] , columns = [i for i in encoder.categories_])
sns.heatmap(cm, linecolor='white', cmap='Blues', linewidth=1, annot=True, fmt='')
plt.title('Confusion Matrix', size=20)
plt.xlabel('Predicted Labels', size=14)
plt.ylabel('Actual Labels', size=14)
plt.show()

In [58]:
print(classification_report(y_test, y_pred))

In [56]:
from sklearn.metrics import confusion_matrix, classification_report

model_name = 'Emotion_Model.h5'
save_dir = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Save model and weights at %s ' % model_path)

# Save the model to disk
model_json = model.to_json()
with open("model_json.json", "w") as json_file:
    json_file.write(model_json)





In [57]:
json_file = open('/kaggle/working/model_json.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("/kaggle/working/saved_models/Emotion_Model.h5")
print("Loaded model from disk")

# the optimiser
loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [175]:
#voting_session
def voting(real_emotion):
    test_df=SAVEE_df
    test_df=test_df['speech'][test_df['label']==real_emotion]
    test_df=test_df.sample(n=10)
    test_df.head()
    test_arr = np.array(test_df)
    sum_final=np.zeros(7)
    voting_sess=[]
    for i in range(int(test_arr.shape[0])):
        XFCC1=extract_mfcc(test_arr[i])
        X1=np.array(XFCC1)
        X1 = np.expand_dims(X1, -1) #to thelei etsi to modelo

        newdf = pd.DataFrame(data=X1).T
        newdf

        newdf= np.expand_dims(newdf, axis=2)
        newpred = loaded_model.predict(newdf, 
                             batch_size=16, 
                             verbose=1)

        final = newpred.argmax(axis=1)
        final = final.astype(int).flatten()
        voting_sess.append(final[0])

        sum_final+=newpred[0]
    sum_final=(sum_final/10)*100
    return(sum_final,voting_sess)










In [176]:
sum_final,voting_sess=voting('sad')

discr_votes=[]

analog_data=[]
analog_data.append(sum_final)
analog_data.append(labelss)

analog_df = pd.DataFrame(analog_data).transpose()
analog_df.columns=['mean %', 'emotions']
print(analog_df)

plt.bar(labelss,sum_final)
plt.title("Mean Percentage voting")
plt.show()

print(np.shape(voting_sess))
for i in range(int(np.shape(voting_sess)[0])):
    discr_votes.append(labelss[int(voting_sess[i])])


discr_df = pd.DataFrame(discr_votes, columns=['votes'])

print(discr_df)


discr_df['votes'].value_counts()
sns.countplot(discr_df['votes'])
plt.title('discrete voting results')





    
    







