In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import librosa
import soundfile
import os, glob,pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate,
n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
            if chroma:
                chroma=np.mean(librosa.feature.chroma_stft(S=stft,
sr=sample_rate).T,axis=0)
                result=np.hstack((result, chroma))
            if mel:
                mel=np.mean(librosa.feature.melspectrogram(X,
sr=sample_rate).T,axis=0)
                result=np.hstack((result, mel))
    return result

In [None]:
emotions={'01':'neutral','02':'calm','03':'happy','04':'sad','05':'angry',
'06':'fearful','07':'disgust','08':'surprised'}
print("Emotions in the data set are : " , emotions)

Emotions in the data set are :  {'01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad', '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'}


In [None]:
observed_emotions=['angry', 'happy', 'neutral', 'sad']
print("Emotions being observed are : " , observed_emotions)

Emotions being observed are :  ['angry', 'happy', 'neutral', 'sad']


In [None]:
def load_data(test_size=0.25):
  x,y=[],[]
  for file in glob.glob("/content/drive/MyDrive/DATASET/Dataset/RAVDESS_Dataset/Actor_*/*.wav"):
    file_name=os.path.basename(file)
    emotion=emotions[file_name.split("-")[2]]
    if emotion not in observed_emotions:
      continue
    try:
      feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
    except:
      continue
    x.append(feature)
    y.append(emotion)
  return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [None]:
x_train,x_test,y_train,y_test=load_data(test_size=0.25)
print((x_train.shape[0], x_test.shape[0]))

(504, 168)


In [None]:
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [None]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,
hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)

In [None]:
model.fit(x_train,y_train)

MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=500)

In [None]:
y_pred=model.predict(x_test)

In [None]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

In [None]:
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 73.21%


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

       angry       0.76      0.83      0.80        47
       happy       0.66      0.76      0.71        51
     neutral       0.71      0.74      0.73        27
         sad       0.83      0.58      0.68        43

    accuracy                           0.73       168
   macro avg       0.74      0.73      0.73       168
weighted avg       0.74      0.73      0.73       168



In [None]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(y_test,y_pred)
print(matrix)

[[39  5  2  1]
 [ 8 39  2  2]
 [ 0  5 20  2]
 [ 4 10  4 25]]
