In [66]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [67]:
#Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [68]:
# Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [69]:
#Load the data and extract features for each sound file /Users/saikirananugam/Desktop/Capstone/datasets.csv/Actor_12*//*.wav
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("/Users/saikirananugam/Desktop/Capstone/datasets.csv/Actor_*//*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [70]:
#Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.2)

In [27]:
x_train

array([[-4.61271820e+02,  2.29928474e+01, -3.58219147e+01, ...,
         3.00771353e-04,  1.76716712e-04,  1.15658870e-04],
       [-5.67372070e+02,  3.58711281e+01, -1.41194515e+01, ...,
         1.15899682e-04,  5.94162702e-05,  3.07429364e-05],
       [-6.62007263e+02,  4.55537109e+01, -3.82850003e+00, ...,
         1.04338731e-04,  4.46671147e-05,  2.33882165e-05],
       ...,
       [-5.87616333e+02,  4.60132599e+01, -2.13807030e+01, ...,
         2.72615252e-05,  1.40989114e-05,  1.09039047e-05],
       [-5.23919434e+02,  3.89732323e+01, -1.22774239e+01, ...,
         1.16796538e-04,  6.61305821e-05,  5.32570921e-05],
       [-6.29891968e+02,  4.82538376e+01, -8.89446354e+00, ...,
         4.20910437e-05,  2.07842495e-05,  1.24669114e-05]])

In [71]:
#Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(614, 154)


In [72]:
#Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [73]:
#Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,hidden_layer_sizes=(300,300,300,300,300,100), learning_rate='adaptive', max_iter=300,activation="relu",solver="lbfgs",)

In [74]:
#Train the model
model.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLPClassifier(alpha=0.01, batch_size=256,
              hidden_layer_sizes=(300, 300, 300, 300, 300, 100),
              learning_rate='adaptive', max_iter=300, solver='lbfgs')

In [75]:
#Predict for the test set
y_pred=model.predict(x_test)

In [76]:
y_pred

array(['calm', 'fearful', 'happy', 'calm', 'fearful', 'fearful', 'calm',
       'happy', 'disgust', 'calm', 'disgust', 'happy', 'disgust', 'calm',
       'happy', 'disgust', 'disgust', 'fearful', 'disgust', 'happy',
       'happy', 'happy', 'fearful', 'happy', 'fearful', 'happy',
       'fearful', 'calm', 'happy', 'happy', 'disgust', 'happy', 'disgust',
       'disgust', 'calm', 'calm', 'calm', 'happy', 'fearful', 'happy',
       'calm', 'calm', 'fearful', 'happy', 'happy', 'happy', 'calm',
       'disgust', 'calm', 'disgust', 'happy', 'happy', 'fearful', 'happy',
       'happy', 'disgust', 'calm', 'calm', 'calm', 'calm', 'happy',
       'fearful', 'calm', 'fearful', 'fearful', 'happy', 'disgust',
       'calm', 'happy', 'fearful', 'fearful', 'calm', 'fearful',
       'disgust', 'calm', 'disgust', 'calm', 'calm', 'disgust', 'fearful',
       'disgust', 'fearful', 'fearful', 'disgust', 'calm', 'happy',
       'calm', 'calm', 'calm', 'calm', 'calm', 'happy', 'fearful',
       'fearful', 

In [80]:
#Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 91.19%


In [36]:
from sklearn.metrics import accuracy_score, f1_score

In [29]:
f1_score(y_test, y_pred,average=None)

array([0.64367816, 0.57142857, 0.47272727, 0.56      ])

In [30]:
import pandas as pd
df=pd.DataFrame({'Actual': y_test, 'Predicted':y_pred})
df.head(20)

Unnamed: 0,Actual,Predicted
0,calm,calm
1,fearful,fearful
2,disgust,disgust
3,disgust,calm
4,happy,fearful
5,fearful,fearful
6,calm,calm
7,happy,happy
8,disgust,disgust
9,calm,calm


In [37]:
import pickle
# Writing different model files to file
with open( 'modelForPrediction1.sav', 'wb') as f:
    pickle.dump(model,f)

In [38]:
filename = 'modelForPrediction1.sav'
loaded_model = pickle.load(open(filename, 'rb')) # loading the model file from the storage

feature=extract_feature("/Users/saikirananugam/Desktop/Capstone/datasets.csv/Actor_04/03-01-01-01-01-01-04.wav", mfcc=True, chroma=True, mel=True)

feature=feature.reshape(1,-1)

prediction=loaded_model.predict(feature)
prediction[0]

'calm'

In [39]:
prediction[0]

'calm'

In [40]:
import pickle
# Writing different model files to file
with open( 'modelForPrediction1.pkl', 'wb') as f:
    pickle.dump(model,f)

In [None]:
###Using LSTM

In [35]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

In [32]:
from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout

In [34]:
model2=Sequential([
    LSTM(123,return_sequences=False,input_shape=(40,1)),
    Dense(64,activation='relu'),
    Dropout(0.2),
    Dense(32,activation='relu'),
    Dropout(0.2),
    Dense(7,activation='softmax')
])
model2.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 123)               61500     
                                                                 
 dense_3 (Dense)             (None, 64)                7936      
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dropout_3 (Dropout)         (None, 32)                0         
                                                                 
 dense_5 (Dense)             (None, 7)                 231       
                                                                 
Total params: 71,747
Trainable params: 71,747
Non-trai

In [39]:
history=model2.fit(x_train,x_test,validation_split=0.2,epochs=10,batch_size=512,shuffle=True)

ValueError: Data cardinality is ambiguous:
  x sizes: 491
  y sizes: 154
Make sure all arrays contain the same number of samples.

####With different classifiers

In [1]:
import speech_recognition as sr
import librosa
import librosa.display
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [16]:
dtree_model = DecisionTreeClassifier(max_depth = 6).fit(x_train, y_train) 
dtree_predictions = dtree_model.predict(x_test) 

In [19]:
#import loading_data
from sklearn.svm import SVC 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier


In [37]:
print(accuracy_score(y_true=y_test,y_pred=dtree_predictions))
print(classification_report(y_test,dtree_predictions)) 
# creating a confusion matrix 
print(confusion_matrix(y_test, dtree_predictions) )

ValueError: Found input variables with inconsistent numbers of samples: [7, 154]

In [22]:
#"""SUPPORT VECTOR MACHINE"""


svm_model_linear = SVC(kernel = 'linear', C = 1).fit(x_train, y_train) 
svm_predictions = svm_model_linear.predict(x_test) 


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions)) 
# creating a confusion matrix 
print(confusion_matrix(y_test, svm_predictions) )



0.6298701298701299
              precision    recall  f1-score   support

        calm       0.70      0.79      0.74        38
     disgust       0.65      0.65      0.65        46
     fearful       0.48      0.58      0.53        26
       happy       0.65      0.50      0.56        44

    accuracy                           0.63       154
   macro avg       0.62      0.63      0.62       154
weighted avg       0.63      0.63      0.63       154

[[30  6  1  1]
 [ 7 30  4  5]
 [ 1  4 15  6]
 [ 5  6 11 22]]


In [24]:
"""Random Forest"""
 
 
classifier = RandomForestClassifier(n_estimators = 100, random_state = 0) 
  
# fit the regressor with x and y data 
classifier.fit(x_train, y_train)   

c_p = classifier.predict(x_test) 


print(accuracy_score(y_true=y_test,y_pred=c_p))
print(classification_report(y_test,c_p)) 
# creating a confusion matrix 
print(confusion_matrix(y_test,c_p) )


0.6558441558441559
              precision    recall  f1-score   support

        calm       0.69      0.92      0.79        38
     disgust       0.71      0.65      0.68        46
     fearful       0.44      0.54      0.48        26
       happy       0.76      0.50      0.60        44

    accuracy                           0.66       154
   macro avg       0.65      0.65      0.64       154
weighted avg       0.67      0.66      0.65       154

[[35  1  2  0]
 [ 9 30  3  4]
 [ 3  6 14  3]
 [ 4  5 13 22]]


In [9]:
import os
print(os.listdir("/Users/saikirananugam/Desktop/Capstone/datasets.csv"))


['Actor_16', 'Actor_11', 'Actor_18', 'Actor_20', 'Actor_21', 'Actor_19', 'Actor_10', 'Actor_17', '.DS_Store', 'Actor_04', 'Actor_03', 'Actor_02', 'Actor_05', 'Actor_12', 'Actor_15', 'Actor_23', 'Actor_24', 'Actor_22', 'Actor_14', 'Actor_13', 'Actor_09', 'Actor_07', 'Actor_06', 'Actor_01', 'Actor_08']


In [40]:
X_train, X_test, y_train, y_test = load_data(test_size=0.25)

print("[+] Number of training samples:", X_train.shape[0])
# number of samples in testing data
print("[+] Number of testing samples:", X_test.shape[0])

[+] Number of training samples: 576
[+] Number of testing samples: 192


In [41]:
import numpy as np
X_train = np.asarray(X_train)
y_train= np.asarray(y_train)
X_test=np.array(X_test)
y_test=np.array(y_test)
     

In [42]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape


((576, 180), (576,), (192, 180), (192,))

In [62]:
x_traincnn = np.expand_dims(X_train, axis=2)
x_testcnn = np.expand_dims(X_test, axis=2)
     

In [63]:
x_traincnn.shape,x_testcnn.shape


((576, 180, 1), (192, 180, 1))

In [57]:
import keras
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Flatten, Dropout, Activation
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import optimizers

model = Sequential()

model.add(Conv1D(128, 5,padding='same',input_shape=(180,1)))        #1
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(MaxPooling1D(pool_size=(8)))



model.add(Conv1D(128, 5,padding='same',))                           #2
model.add(Activation('relu'))
model.add(Dropout(0.1))

model.add(Flatten())
model.add(Dense(8))                                                 #3
model.add(Activation('softmax'))
#opt =optimizers.rmsprop(lr=0.00005, rho=0.9, epsilon=None, decay=0.0)
     

In [58]:
from tensorflow import keras
from keras import optimizers
opt=keras.optimizers.RMSprop(learning_rate=0.01, rho=0.9, epsilon=None, decay=0.0)

In [59]:
model.summary()


Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_14 (Conv1D)          (None, 180, 128)          768       
                                                                 
 activation_21 (Activation)  (None, 180, 128)          0         
                                                                 
 dropout_14 (Dropout)        (None, 180, 128)          0         
                                                                 
 max_pooling1d_7 (MaxPooling  (None, 22, 128)          0         
 1D)                                                             
                                                                 
 conv1d_15 (Conv1D)          (None, 22, 128)           82048     
                                                                 
 activation_22 (Activation)  (None, 22, 128)           0         
                                                      

In [60]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
     

In [64]:
cnnhistory=model.fit(x_traincnn, y_train, batch_size=20, epochs=500, validation_data=(x_testcnn, y_test))


Epoch 1/500


2023-04-09 16:28:19.604120: W tensorflow/core/framework/op_kernel.cc:1722] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to int64 is not supported


UnimplementedError:  Cast string to int64 is not supported
	 [[node sparse_categorical_crossentropy/Cast
 (defined at /opt/anaconda3/lib/python3.9/site-packages/keras/backend.py:1977)
]] [Op:__inference_train_function_1681]

Errors may have originated from an input operation.
Input Source operations connected to node sparse_categorical_crossentropy/Cast:
In[0] ExpandDims (defined at /opt/anaconda3/lib/python3.9/site-packages/keras/engine/compile_utils.py:677)

Operation defined at: (most recent call last)
>>>   File "/opt/anaconda3/lib/python3.9/runpy.py", line 197, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/traitlets/config/application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 677, in start
>>>     self.io_loop.start()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/asyncio/base_events.py", line 596, in run_forever
>>>     self._run_once()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/asyncio/base_events.py", line 1890, in _run_once
>>>     handle._run()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/asyncio/events.py", line 80, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 353, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "/var/folders/cf/78m2pngj3kqgdk88fltk67r80000gn/T/ipykernel_3617/3869054478.py", line 1, in <module>
>>>     cnnhistory=model.fit(x_traincnn, y_train, batch_size=20, epochs=500, validation_data=(x_testcnn, y_test))
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
>>>     return fn(*args, **kwargs)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1216, in fit
>>>     tmp_logs = self.train_function(iterator)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 878, in train_function
>>>     return step_function(self, iterator)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 867, in step_function
>>>     outputs = model.distribute_strategy.run(run_step, args=(data,))
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 860, in run_step
>>>     outputs = model.train_step(data)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 809, in train_step
>>>     loss = self.compiled_loss(
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 201, in __call__
>>>     loss_value = loss_obj(y_t, y_p, sample_weight=sw)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 141, in __call__
>>>     losses = call_fn(y_true, y_pred)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 245, in call
>>>     return ag_fn(y_true, y_pred, **self._fn_kwargs)
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 1737, in sparse_categorical_crossentropy
>>>     return backend.sparse_categorical_crossentropy(
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/backend.py", line 5098, in sparse_categorical_crossentropy
>>>     target = cast(target, 'int64')
>>> 
>>>   File "/opt/anaconda3/lib/python3.9/site-packages/keras/backend.py", line 1977, in cast
>>>     return tf.cast(x, dtype)
>>> 