## Importing the required libraries

In [0]:
!git clone -l -s -b sound-dataset git://github.com/srgynmv/EmoteNN.git emotenn
%cd emotenn

In [0]:
import os
import glob
import librosa
import librosa.display
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram

In [0]:
import tensorflow.keras
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Flatten, Dropout, Activation
from tensorflow.keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix

In [0]:
from emotenn import constants as ct, gen_utils as gu, train_utils as tu, load_utils as lu

## Setting the labels

In [0]:
speech = ct.GDriveFile('1b5Xq1whO2GPGekmJQe6AZOYIsiQCbMFq', os.path.join(ct.DATASETS_DIR, 'Audio_Speech_Actors_01-24.zip'))

In [0]:
lu.download_file_from_google_drive(speech)
gu.unpack(speech.path)

In [0]:
song = ct.GDriveFile('1bS64Cj3eji6wD4C6nw1kyGlNZqUuy8P2', os.path.join(ct.DATASETS_DIR, 'Audio_Song_Actors_01-24.zip'))

In [0]:
lu.download_file_from_google_drive(song)
gu.unpack(song.path)

In [0]:
sound_files = os.path.join(ct.UNPACKED_DIR, 'Audio_*_Actors_01-24', '**', '*.wav')
mylist = glob.glob(sound_files, recursive=True)

In [0]:
feeling_list = []
for path in mylist:
    name = os.path.basename(path)
    emotion = name[6:8]
    if emotion in ['01', '02']:
        feeling_list.append('neutral')
    elif emotion == '03':
        feeling_list.append('happy')
    elif emotion == '04':
        feeling_list.append('sad')
    elif emotion == '05':
        feeling_list.append('angry')
    elif emotion == '06':
        feeling_list.append('fearful')
    elif emotion == '07':
        feeling_list.append('disgust')
    elif emotion == '08':
        feeling_list.append('surprised')

In [0]:
labels = pd.DataFrame(feeling_list)

In [0]:
labels[:10]

## Getting the features of audio files using librosa

In [0]:
try:
  df_file = ct.GDriveFile('1n5fpSytrJz67altlv2fv3sFWN-6kDuwS', os.path.join(ct.DATASETS_DIR, 'ravdess.pcl'))
  lu.download_file_from_google_drive(df_file)
  df = pd.read_pickle(df_file.path)
except:
  df = pd.DataFrame(columns=['feature'])
  bookmark=0
  for index, file in enumerate(mylist):
      X, sample_rate = librosa.load(file, res_type='kaiser_fast',duration=2.5,sr=22050*2,offset=0.5)
      sample_rate = np.array(sample_rate)
      mfccs = np.mean(librosa.feature.mfcc(y=X, 
                                          sr=sample_rate, 
                                          n_mfcc=13),
                                          axis=0)
      feature = mfccs
      #[float(i) for i in feature]
      #feature1=feature[:135]
      df.loc[bookmark] = [feature]
      bookmark=bookmark+1 
      # df.to_pickle(os.path.join(ct.DATASETS_DIR, "mfccs_serialized.pcl"))

In [0]:
df3 = pd.DataFrame(df['feature'].values.tolist())

In [0]:
df3

df3[:5]

In [0]:
newdf = pd.concat([df3,labels], axis=1)

In [0]:
rnewdf = newdf.rename(index=str, columns={"0": "label"})

In [0]:
rnewdf[:5]

In [0]:
from sklearn.utils import shuffle
rnewdf = shuffle(newdf)
rnewdf[:10]

In [0]:
rnewdf=rnewdf.fillna(0)

## Dividing the data into test and train

In [0]:
newdf1 = np.random.rand(len(rnewdf)) < 0.8
train = rnewdf[newdf1]
test = rnewdf[~newdf1]

In [0]:
train[250:260]

In [0]:
trainfeatures = train.iloc[:, :-1]

In [0]:
trainlabel = train.iloc[:, -1:]

In [0]:
testfeatures = test.iloc[:, :-1]

In [0]:
testlabel = test.iloc[:, -1:]

In [27]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

X_train = np.array(trainfeatures)
y_train = np.array(trainlabel)
X_test = np.array(testfeatures)
y_test = np.array(testlabel)

lb = LabelEncoder()

y_train = to_categorical(lb.fit_transform(y_train))
y_test = to_categorical(lb.fit_transform(y_test))


  y = column_or_1d(y, warn=True)


In [28]:
y_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)

In [29]:
X_train.shape

(1937, 216)

## Changing dimension for CNN model

In [0]:

x_traincnn =np.expand_dims(X_train, axis=2)
x_testcnn= np.expand_dims(X_test, axis=2)

In [31]:
strategy = tu.get_distribution_strategy()
with strategy.scope():
    model = Sequential()
    model.add(Conv1D(256, 5,padding='same',
                    input_shape=(216,1)))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 5,padding='same'))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(MaxPooling1D(pool_size=(8)))
    model.add(Conv1D(128, 5,padding='same',))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 5,padding='same',))
    model.add(Activation('relu'))
    model.add(Conv1D(128, 5,padding='same',))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Conv1D(128, 5,padding='same',))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(7))
    model.add(Activation('softmax'))
    opt = tf.keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


### Removed the whole training part for avoiding unnecessary long epochs list

In [0]:
cnnhistory=model.fit(x_traincnn, y_train, batch_size=16, epochs=300, validation_data=(x_testcnn, y_test))

In [0]:
plt.plot(cnnhistory.history['accuracy'])
plt.plot(cnnhistory.history['val_accuracy'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

## Saving the model

In [0]:
model_name = 'Emotion_Voice_Detection_Model.h5'
save_dir = os.path.join(os.getcwd(), 'saved_models')
# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

In [0]:
import json
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

## Loading the model

In [0]:
# loading json and creating model
from keras.models import model_from_json
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("saved_models/Emotion_Voice_Detection_Model.h5")
print("Loaded model from disk")
 
# evaluate loaded model on test data
loaded_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
score = loaded_model.evaluate(x_testcnn, y_test, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

## Predicting emotions on the test data

In [43]:
preds = loaded_model.predict(x_testcnn, 
                         batch_size=32, 
                         verbose=1)

NameError: ignored

In [0]:
preds

In [0]:
preds1=preds.argmax(axis=1)

In [0]:
preds1

In [0]:
abc = preds1.astype(int).flatten()

In [0]:
predictions = (lb.inverse_transform((abc)))

In [0]:
preddf = pd.DataFrame({'predictedvalues': predictions})
preddf[:10]

In [0]:
actual=y_test.argmax(axis=1)
abc123 = actual.astype(int).flatten()
actualvalues = (lb.inverse_transform((abc123)))

In [0]:
actualdf = pd.DataFrame({'actualvalues': actualvalues})
actualdf[:10]

In [0]:
finaldf = actualdf.join(preddf)

## Actual v/s Predicted emotions

In [0]:
finaldf[170:180]

In [0]:
finaldf.groupby('actualvalues').count()

In [0]:
finaldf.groupby('predictedvalues').count()

In [0]:
finaldf.to_csv('Predictions.csv', index=False)

## Live Demo

#### The file 'output10.wav' in the next cell is the file that was recorded live using the code in AudioRecoreder notebook found in the repository

In [0]:
data, sampling_rate = librosa.load('output10.wav')

In [0]:
% pylab inline
import os
import pandas as pd
import librosa
import glob 

plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

In [0]:
#livedf= pd.DataFrame(columns=['feature'])
X, sample_rate = librosa.load('output10.wav', res_type='kaiser_fast',duration=2.5,sr=22050*2,offset=0.5)
sample_rate = np.array(sample_rate)
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13),axis=0)
featurelive = mfccs
livedf2 = featurelive

In [0]:
livedf2= pd.DataFrame(data=livedf2)

In [0]:
livedf2 = livedf2.stack().to_frame().T

In [0]:
livedf2

In [0]:
twodim= np.expand_dims(livedf2, axis=2)

In [0]:
livepreds = loaded_model.predict(twodim, 
                         batch_size=32, 
                         verbose=1)

In [0]:
livepreds

In [0]:
livepreds1=livepreds.argmax(axis=1)

In [0]:
liveabc = livepreds1.astype(int).flatten()

In [0]:
livepredictions = (lb.inverse_transform((liveabc)))
livepredictions

In [0]:
type(mylist)

In [0]:
print(mylist[1800])

In [0]:
print(mylist[400][6:-16])

## Plotting the audio file's waveform and its spectrogram

In [0]:
data, sampling_rate = librosa.load(mylist[0])

In [0]:
import os
import pandas as pd
import librosa
import glob 

plt.figure(figsize=(15, 5))
librosa.display.waveplot(data, sr=sampling_rate)

In [0]:
import matplotlib.pyplot as plt
import scipy.io.wavfile
import numpy as np
import sys


sr,x = scipy.io.wavfile.read(mylist[0])

## Parameters: 10ms step, 30ms window
nstep = int(sr * 0.01)
nwin  = int(sr * 0.03)
nfft = nwin

window = np.hamming(nwin)

## will take windows x[n1:n2].  generate
## and loop over n2 such that all frames
## fit within the waveform
nn = range(nwin, len(x), nstep)

X = np.zeros( (len(nn), nfft//2) )

for i,n in enumerate(nn):
    xseg = x[n-nwin:n]
    z = np.fft.fft(window * xseg, nfft)
    X[i,:] = np.log(np.abs(z[:nfft//2]))

plt.imshow(X.T, interpolation='nearest',
    origin='lower',
    aspect='auto')

plt.show()