In [1]:
import numpy as np
import cv2
import pandas as pd
import os
import pickle

In [2]:
data=pickle.load(open('/content/drive/MyDrive/emotion_data/data_emotion_features.pickle',mode='rb'))

In [3]:
X=np.array(data['data']) 
y=np.array(data['label'])


In [4]:
X.shape , y.shape

((8184, 1, 128), (8184,))

In [8]:
X=X.reshape(-1,128)
X.shape

(8184, 128)

In [9]:
import sklearn
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(X,y,train_size=0.8,random_state=0)

In [10]:
x_train.shape ,x_test.shape,y_train.shape,y_test.shape

((6547, 128), (1637, 128), (6547,), (1637,))

# Models

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report,accuracy_score,f1_score

In [12]:
def get_report(model,x_train,y_train,x_test,y_test):
  y_pred_train = model.predict(x_train)
  y_pred_test=model.predict(x_test)

  acc_train= accuracy_score(y_train,y_pred_train)
  acc_test=accuracy_score(y_test,y_pred_test)

  f1_train=f1_score(y_train,y_pred_train,average='macro')
  f1_test=f1_score(y_test,y_pred_test,average='macro')

  print('Acuuracy Train : %0.2f'%(acc_train*100))
  print('Acuuracy Test : %0.2f'%(acc_test*100))
  print('F1 Train : %0.2f'%(f1_train*100))
  print('F1 Test : %0.2f'%(f1_test*100))


# Logistic Regression

In [None]:
model_logistic = LogisticRegression( solver="saga")
model_logistic.fit(x_train,y_train)


LogisticRegression(solver='saga')

In [None]:
get_report(model_logistic,x_train,y_train,x_test,y_test)

Acuuracy Train : 33.97
Acuuracy Test : 32.50
F1 Train : 31.73
F1 Test : 30.08


# SVM

In [None]:
model_SVC=SVC()
model_SVC.fit(x_train,y_train)

SVC()

In [None]:
get_report(model_SVC,x_train,y_train,x_test,y_test)

Acuuracy Train : 49.14
Acuuracy Test : 38.55
F1 Train : 48.49
F1 Test : 37.91


# Random Forest

In [None]:
model_rf=RandomForestClassifier(n_estimators=10)
model_rf.fit(x_train,y_train)

RandomForestClassifier(n_estimators=10)

In [None]:
get_report(model_rf,x_train,y_train,x_test,y_test)

Acuuracy Train : 99.47
Acuuracy Test : 52.90
F1 Train : 99.47
F1 Test : 53.45


# Voting Classifier

In [None]:
model_voting = VotingClassifier(estimators=[
                                            ('logistic',LogisticRegression(solver='saga')),
                                            ('svm',SVC(probability=True)),
                                            ('rf',RandomForestClassifier())
],voting='soft',weights=[2,3,1])

In [None]:
model_voting.fit(x_train,y_train)

VotingClassifier(estimators=[('logistic', LogisticRegression(solver='saga')),
                             ('svm', SVC(probability=True)),
                             ('rf', RandomForestClassifier())],
                 voting='soft', weights=[2, 3, 1])

In [None]:
get_report(model_voting,x_train,y_train,x_test,y_test)

Acuuracy Train : 74.42
Acuuracy Test : 47.10
F1 Train : 74.24
F1 Test : 46.52


# Parameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
model_grid = GridSearchCV(model_voting,
                          param_grid={
                              'logistic__solver':['saga'],
                              'svm__C':[3,5],
                              'svm__gamma':[0.1,0.3,0.5],
                              'rf__n_estimators':[5,10,20],
                              'rf__max_depth':[3,5,7],
                              'voting':['soft','hard']
                          },scoring='accuracy',cv=3,n_jobs=1,verbose=2)

In [None]:
model_grid.fit(x_train,y_train)

Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=  16.9s
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=  16.3s
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=  16.1s
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=  16.1s
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=  16.5s
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=  16.2s
[CV] END logistic__solver=saga, rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=  16.2s
[CV] END logistic__solver=saga, rf__max_depth=3,

GridSearchCV(cv=3,
             estimator=VotingClassifier(estimators=[('logistic',
                                                     LogisticRegression(solver='saga')),
                                                    ('svm',
                                                     SVC(probability=True)),
                                                    ('rf',
                                                     RandomForestClassifier())],
                                        voting='soft', weights=[2, 3, 1]),
             n_jobs=1,
             param_grid={'logistic__solver': ['saga'],
                         'rf__max_depth': [3, 5, 7],
                         'rf__n_estimators': [5, 10, 20], 'svm__C': [3, 5],
                         'svm__gamma': [0.1, 0.3, 0.5],
                         'voting': ['soft', 'hard']},
             scoring='accuracy', verbose=2)

In [None]:
model_best_estimator=model_grid.best_estimator_

In [None]:
model_grid.best_score_

0.37818936399775

In [None]:
pickle.dump(model_best_estimator,open('/content/drive/MyDrive/models/emotion_model.pkl',mode='wb'))

In [5]:
emotion_recognition_model=pickle.load(open('/content/drive/MyDrive/models/machine_learning_emotion.pkl',mode='rb'))


In [6]:
emotion_recognition_model.get_params

<bound method _BaseHeterogeneousEnsemble.get_params of VotingClassifier(estimators=[('logistic', LogisticRegression(solver='saga')),
                             ('svm', SVC(C=5, gamma=0.5, probability=True)),
                             ('rf',
                              RandomForestClassifier(max_depth=7,
                                                     n_estimators=20))],
                 weights=[2, 3, 1])>

In [13]:
model_voting = VotingClassifier(estimators=[
                                            ('logistic',LogisticRegression(solver='saga')),
                                            ('svm',SVC(C=5,gamma=0.5,probability=True)),
                                            ('rf',RandomForestClassifier(max_depth=7,n_estimators=20))
],voting='soft',weights=[2,3,1])

In [15]:
model_voting.fit(x_train,y_train)

VotingClassifier(estimators=[('logistic', LogisticRegression(solver='saga')),
                             ('svm', SVC(C=5, gamma=0.5, probability=True)),
                             ('rf',
                              RandomForestClassifier(max_depth=7,
                                                     n_estimators=20))],
                 voting='soft', weights=[2, 3, 1])

In [16]:
get_report(model_voting,x_train,y_train,x_test,y_test)

Acuuracy Train : 49.53
Acuuracy Test : 38.79
F1 Train : 48.50
F1 Test : 37.51


In [18]:
pickle.dump(model_voting,open('/content/drive/MyDrive/models/emotion_model_new.pkl',mode='wb'))