In [8]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# Reading data

In [2]:
data = pd.read_csv("Emotion_Databases.csv")
emotions = {'neutral': 1, 
            'calm': 2, 
            'happy': 3, 
            'sad': 4, 
            'angry': 5, 
            'fearful': 6, 
            'disgust': 7,
            'surprised': 8, 
            'boredom': 9}
data["Emotion"] = data["Emotion"].map(emotions)
names = data["File Name"]
data = data.drop("File Name", axis = 1)
data

Unnamed: 0,Emotion,MFCC Mean,MFCC Standard deviation,Croma Mean,Chroma Standard deviation,MEL Mean,MEL Standard deviation
0,1,113.995018,-17.983574,0.272483,0.675760,0.040853,0.003970
1,1,112.260872,-17.682295,0.218568,0.725722,0.047049,0.004594
2,1,111.574677,-17.600674,0.208594,0.728253,0.063881,0.006346
3,1,110.698242,-17.382687,0.193784,0.747211,0.061945,0.005832
4,2,115.860268,-18.125637,0.227840,0.699711,0.023791,0.002190
...,...,...,...,...,...,...,...
2450,8,100.367325,-9.399616,0.267632,0.647229,0.434254,0.023873
2451,8,96.449471,-9.660956,0.265470,0.621995,0.526482,0.024304
2452,8,99.923615,-10.737567,0.280428,0.595984,0.524668,0.033549
2453,8,96.282051,-10.374496,0.261964,0.627558,0.687390,0.034079


In [3]:
y = data["Emotion"]
data = data.drop("Emotion", axis = 1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size = 0.22)

In [5]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1914, 6), (541, 6), (1914,), (541,))

In [36]:
net = MLPClassifier(hidden_layer_sizes = tuple([40] * 10), alpha = 0.001, learning_rate = "constant", \
                    random_state = 11, verbose = 3)

In [37]:
# params = {
#     "hidden_layer_sizes": [tuple([i] * 10) for i in range(10, 51, 10)],
#     "alpha": [0.0001, 0.001, 0.01, 0.1, 1],
#     "learning_rate": ["constant", "invscaling", "adaptive"],
    
# }
# net = GridSearchCV(net, params, scoring = "accuracy", n_jobs = -1, verbose = 3)

I have already found the necessary params, so grid search is not needed for now

In [38]:
net.fit(X_train, y_train)

Iteration 1, loss = 2.24095893
Iteration 2, loss = 2.16364717
Iteration 3, loss = 2.12872202
Iteration 4, loss = 2.09660222
Iteration 5, loss = 2.06767643
Iteration 6, loss = 2.03969524
Iteration 7, loss = 2.01635341
Iteration 8, loss = 1.99970352
Iteration 9, loss = 1.99494525
Iteration 10, loss = 1.98980500
Iteration 11, loss = 1.96840785
Iteration 12, loss = 1.94643047
Iteration 13, loss = 1.93589857
Iteration 14, loss = 1.91413277
Iteration 15, loss = 1.89692487
Iteration 16, loss = 1.87581628
Iteration 17, loss = 1.85798948
Iteration 18, loss = 1.84834735
Iteration 19, loss = 1.82906811
Iteration 20, loss = 1.80440906
Iteration 21, loss = 1.80831070
Iteration 22, loss = 1.80269439
Iteration 23, loss = 1.76966626
Iteration 24, loss = 1.76472876
Iteration 25, loss = 1.75576975
Iteration 26, loss = 1.74823738
Iteration 27, loss = 1.74463756
Iteration 28, loss = 1.74821727
Iteration 29, loss = 1.72338971
Iteration 30, loss = 1.71672815
Iteration 31, loss = 1.72226760
Iteration 32, los

MLPClassifier(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(40, 40, 40, 40, 40, 40, 40, 40, 40, 40),
              learning_rate='constant', learning_rate_init=0.001, max_fun=15000,
              max_iter=200, momentum=0.9, n_iter_no_change=10,
              nesterovs_momentum=True, power_t=0.5, random_state=11,
              shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
              verbose=3, warm_start=False)

In [39]:
accuracy_score(net.predict(X_test), y_test)

0.36414048059149723

# Saving the model

In [41]:
filename = 'net.sav'
pickle.dump(net, open(filename, 'wb'))

# Loading the model

In [12]:
filename = 'net.sav'
net = pickle.load(open(filename, 'rb'))

In [34]:
accuracy_score(net.predict(X_test), y_test)

0.3678373382624769

# Let's have a look at the feature importance 

In [38]:
pd.DataFrame({"feature": X_train.columns,
             "coef": np.mean(net.coefs_[0])})

Unnamed: 0,feature,coef
0,MFCC Mean,0.029156
1,MFCC Standard deviation,0.029156
2,Croma Mean,0.029156
3,Chroma Standard deviation,0.029156
4,MEL Mean,0.029156
5,MEL Standard deviation,0.029156


## Confusing, but all features have the same importance. Maybe I did smth wrong