In [1]:
import numpy as np
import pandas as pd

from sklearn import svm
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

In [2]:
superBAD_df = pd.read_excel('../data/model_stat_features_ohe_norm.xlsx')
superBAD_df

Unnamed: 0,participant_id,class,video,gaze_0_x_avg,gaze_0_x_std,gaze_0_y_avg,gaze_0_y_std,gaze_0_z_avg,gaze_0_z_std,gaze_1_x_avg,...,AU23_c_avg,AU23_c_std,AU25_c_avg,AU25_c_std,AU26_c_avg,AU26_c_std,AU28_c_avg,AU28_c_std,AU45_c_avg,AU45_c_std
0,1499,0,ch1_1,-1.848976,1.832604,0.486999,2.091978,-0.013749,0.051782,-1.280947,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-0.648536,-0.399438
1,1499,0,ch2_1,-0.998615,0.053947,0.733473,-0.384329,-0.111539,-0.175017,-1.201650,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-1.341785,-2.487180
2,1499,0,ch3_1,0.630893,2.988496,0.286679,2.265382,-0.022411,0.146258,-0.855233,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-0.487258,-0.189236
3,1499,0,ch4_1,1.022831,-0.582850,-0.424730,0.077492,-0.268368,-0.251832,-2.448838,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,1.849659,1.351579
4,1499,0,ch5_1,-0.797190,-0.670601,0.676771,-0.526596,-0.166126,-0.235961,-0.990040,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-1.106926,-1.242754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
858,9214,1,fr5_1,-0.653220,0.398168,0.448151,-0.236552,-0.263871,-0.188972,1.349746,...,1.770707,0.093749,-0.188711,0.358751,0.013142,0.639736,-0.127239,-0.189371,0.144553,0.438069
859,9214,1,fr6_1,-0.455734,0.407437,0.093545,-0.190948,-0.395646,-0.217012,1.609016,...,1.319957,1.187774,0.262834,0.787531,0.026391,0.654418,-0.127239,-0.189371,-0.491015,-0.193827
860,9214,1,fr7_1,-0.736579,-0.011720,0.219754,1.706656,-0.314383,-0.260818,1.535812,...,1.296423,1.220278,1.940840,1.558392,2.819306,2.078990,-0.127239,-0.189371,-0.128345,0.197466
861,9214,1,fr8_1,-0.311335,0.145468,0.211662,-0.642755,-0.361850,-0.269734,1.607338,...,0.424747,1.667976,2.984664,1.697262,4.213879,2.259057,-0.127239,-0.189371,-0.037135,0.282210


### Train Test split

In [3]:
features = superBAD_df.iloc[:, 3:]
target_class = superBAD_df.iloc[:, 1].values
target_class = target_class.astype('int')
# target_class.shape

In [4]:
X_train, X_test, y_train, y_test = train_test_split(features, target_class, test_size = 0.20, random_state = 42)

### SVM

In [5]:
svm_classifier = svm.SVC(decision_function_shape='ovo')
svm_classifier.fit(X_train, y_train)

y_predict = svm_classifier.predict(X_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

           0       0.43      0.73      0.54        56
           1       0.47      0.15      0.23        61
           2       0.48      0.50      0.49        56

    accuracy                           0.45       173
   macro avg       0.46      0.46      0.42       173
weighted avg       0.46      0.45      0.41       173



### GridSearch - SVM

In [6]:
gamma_exp = [-15,-13,-11,-9,-7,-5,-3,-1,1,3]
c_exp = [-5,-3,-1,1,3,5,7,9,11,13,15]

gamma_list = []
c_list = []

for i in gamma_exp:
    gamma_list.append(2**i)

for i in c_exp:
    c_list.append(2**i)

parameters = {'kernel': ['rbf'], 'C': c_list, 'gamma': gamma_list}
# parameters = {'kernel': ['rbf', 'sigmoid', 'poly'], 'C': c_list, 'gamma': gamma_list}
grid = GridSearchCV(svm.SVC(decision_function_shape='ovo'), parameters, refit= True, verbose = True)

grid.fit(X_train, y_train)

Fitting 5 folds for each of 110 candidates, totalling 550 fits


In [7]:
# print best parameter after tuning
print(grid.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)

{'C': 8, 'gamma': 0.00048828125, 'kernel': 'rbf'}
SVC(C=8, decision_function_shape='ovo', gamma=0.00048828125)


In [8]:
grid_predictions = grid.predict(X_test)
  
# print classification report
print(classification_report(y_test, grid_predictions))

              precision    recall  f1-score   support

           0       0.40      0.73      0.52        56
           1       0.41      0.18      0.25        61
           2       0.40      0.30      0.34        56

    accuracy                           0.40       173
   macro avg       0.40      0.41      0.37       173
weighted avg       0.40      0.40      0.37       173

