In [None]:
!apt-get update
!apt-get install -y libsndfile1
import pandas as pd
import numpy as np

import os
import sys
import IPython.display as ipd
# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.preprocessing import StandardScaler, OneHotEncoder
#from sklearn.metrics import confusion_matrix, classification_reportss
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio
from glob import glob
import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [None]:
Ravdess = "/kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"

In [None]:
ravdess_directory_list = os.listdir(Ravdess)
lables=[]
path=[]

In [None]:
for dir in ravdess_directory_list:
    # as their are 20 different actors in our previous directory we need to extract files for each actor.
    actor = os.listdir(Ravdess + dir)
   
    for file in actor:
        part = file.split('.')[0]
        part = part.split('-')
        lables.append(int(part[2]))
        path.append(Ravdess + dir + '/' + file)

In [None]:
df=pd.DataFrame()
df['Speech']=path
df['Lable']=lables
df['emotion']=None

In [None]:
df.head()

# **Data visualization**

# **Emotion**:
**(01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised)**

In [None]:
switcher = {
        1: "neutral",
        2: "calm",
        3: "happy",
        4: "sad",
        5: "angry",
        6: "fearful",
        7: "disgust",
        8: "surprised",
    }

In [None]:
df.shape[0]

In [None]:
for j in range(1,9):
    for i in range(df.shape[0]):
        if(j==df['Lable'][i]):
            df['emotion'][i]=switcher.get(j)


In [None]:
df.head()

In [None]:

ipd.Audio(df['Speech'][2])


In [None]:
from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

# **Getting the sample rate of audio file using librosa**

In [None]:
y, sr = librosa.load(df['Speech'][2])

print(f'sr: {sr}')

# **Plotting signal**

In [None]:
pd.Series(y).plot(figsize=(10, 5),
                  lw=1,
                  title=df['emotion'][2]+" "+'Raw Audio Example',
                  )
plt.show()

# **signal after trimming silence**

In [None]:
y_trimmed, _ = librosa.effects.trim(y, top_db=20)
pd.Series(y_trimmed).plot(figsize=(10, 5),
                  lw=1,
                  title=df['emotion'][2]+" "+'Raw Audio Trimmed Example',
                 color=color_pal[1])
plt.show()

# **Spectogram**

In [None]:
D = librosa.stft(y)
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
S_db.shape

In [None]:
# Plot the transformed audio data
fig, ax = plt.subplots(figsize=(10, 5))
img = librosa.display.specshow(S_db,
                              x_axis='time',
                              y_axis='log',
                              ax=ax)
ax.set_title('Spectogram Example', fontsize=20)
fig.colorbar(img, ax=ax, format=f'%0.2f')
plt.show()

# **Feature extraction**

In [None]:
def extract_MFCC(file):
    
    y, sr = librosa.load(file,duration=3,offset=0.5)
    mfcc=np.mean(librosa.feature.mfcc(y=y,sr=sr,n_mfcc=40).T,axis=0)
    return mfcc

In [None]:
X_mfcc=df['Speech'].apply(lambda x:extract_MFCC(x))

In [None]:
X=[x for x in X_mfcc]
X=np.array(X)
X.shape

In [None]:

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['Lable']])
y = y.toarray()
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=0, shuffle=True)
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train=np.expand_dims(x_train,-1)
x_test=np.expand_dims(x_test,-1)





     



# **Modeling**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))

model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])

model.summary()


In [None]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000000001)
history=model.fit(x_train, y_train, batch_size=64, epochs=100, validation_data=(x_test, y_test), callbacks=[rlrp])

In [None]:
print("Accuracy of our model on test data : " , model.evaluate(x_test,y_test)[1]*100 , "%")