In [None]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype = "float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
    return result

In [78]:
emotions = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised' }

observed_emotions = ['happy', 'neutral', 'angry', 'sad' ]

In [79]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [96]:
def load_data(test_size=0.2):
    x,y = [],[]
    for file in glob.glob("/content/drive/My Drive/SER_Data/data/Actor_*/*.wav"):
      file_name = os.path.basename(file)
      sound = AudioSegment.from_wav(file)
      sound = sound.set_channels(1)
      sound.export(file, format="wav")
      emotion=emotions[file_name.split("-")[2]]
      if emotion not in observed_emotions:
        continue
      feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
      x.append(feature)
      y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9, stratify = y)

In [81]:
# !pip3 install pydub
# !pip install pydub

In [82]:
from pydub import AudioSegment

In [97]:
X_train, X_test, y_train, y_test = load_data(test_size=0.20)

In [98]:
print((X_train.shape[0], X_test.shape[0]))

(537, 135)


In [99]:
print(f'Features extracted: {X_train.shape[1]}')

Features extracted: 180


In [100]:
import lightgbm as LGBMClassifier

In [101]:
model =   lgb.LGBMClassifier()

In [102]:
model.fit(X_train,y_train)

LGBMClassifier()

In [103]:
predictions = model.predict(X_test)

In [104]:
accuracy = accuracy_score(y_test, predictions)

print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 82.96%


In [105]:
from sklearn.metrics import classification_report,confusion_matrix

In [106]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

       angry       0.82      0.95      0.88        39
       happy       0.79      0.71      0.75        38
     neutral       0.84      0.84      0.84        19
         sad       0.86      0.82      0.84        39

    accuracy                           0.83       135
   macro avg       0.83      0.83      0.83       135
weighted avg       0.83      0.83      0.83       135



In [None]:
def predict(data):
    check = load('model.pkl')
    my_prediction = check.predict(data)
    return my_prediction