In [2]:
import librosa as lb
import soundfile as sf
import numpy as np
import os, glob, pickle
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


In [3]:
emotion_labels = {
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}


good_emotions=['neutral','calm','happy','surprised']
bad_emotions=['sad','angry','fearful','disgust']

In [4]:
def audio_features(file_title, mfcc, chroma, mel):
    with sf.SoundFile(file_title) as audio_recording:
        audio = audio_recording.read(dtype="float32")
        sample_rate = audio_recording.samplerate
        
        if chroma:
            stft=np.abs(lb.stft(audio))
            result=np.array([])
        if mfcc:
            mfccs=np.mean(lb.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(lb.feature.melspectrogram(audio, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
        return result

In [5]:
def loading_audio_data():
    x = []
    y = []
    
    for file in glob.glob("data//Actor_*//*.wav"):
        
        file_path=os.path.basename(file)
        emotion = emotion_labels[file_path.split("-")[2]]
        
        try:
            feature = audio_features(file, mfcc=True, chroma=True, mel=True)
            if emotion in good_emotions:
                y.append('good')
            else:
                y.append('bad')
            x.append(feature)
        except:
            continue
        
    final_dataset = train_test_split(np.array(x), y, test_size=0.1, random_state=9)
    return final_dataset

In [6]:
X_train, X_test, y_train, y_test = loading_audio_data()


In [6]:
# param_grid={'solver' : ['lbfgs', 'sgd', 'adam'],
#             'hidden_layer_sizes':[(100,),(120,),(130,),(140,),(150,),(200,)],
#            }

In [7]:
# grid=GridSearchCV(MLPClassifier(),param_grid=param_grid,n_jobs=-1)
# grid.fit(X_train,y_train)
# print(grid.best_params_)

{'hidden_layer_sizes': (120,), 'solver': 'adam'}


In [7]:
model = MLPClassifier(hidden_layer_sizes=(200,),max_iter=400,verbose=False)

In [8]:
model.fit(X_train,y_train)


MLPClassifier(hidden_layer_sizes=(200,), max_iter=400)

In [24]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print("Accuracy of the Recognizer is: {:.1f}%".format(accuracy*100))

Accuracy of the Recognizer is: 75.0%


In [12]:
import pickle
Pkl_Filename = "Sound_Model.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(model, file)

In [30]:
with sf.SoundFile('badd.wav') as audio_recording:
        audio = audio_recording.read(dtype="float32")
audio.shape


(466944,)

In [10]:
feature = audio_features('badd.wav', mfcc=True, chroma=True, mel=True)
print(X_test[0].shape)
model.predict(feature.reshape(1,-1))


(180,)


array(['bad'], dtype='<U4')