In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFE
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

# Class labels

In [2]:
y = pd.read_csv("za_klasifikaciju.csv")
del y['Participant_id']

y.head(5)

Unnamed: 0,Trial,Valence,Arousal,Dominance,Liking
0,1,6.96,3.92,7.19,6.05
1,2,7.23,7.15,6.94,8.01
2,3,4.94,6.01,6.12,8.06
3,4,7.04,7.09,8.01,8.22
4,5,8.26,7.91,7.19,8.13


# Import Feats

In [41]:
features = pd.read_csv("all_features.csv")
features = features.fillna(0) # replace NaN values with zeros

features.head(4)

Unnamed: 0,mean_gsr,std_gsr,1st derivative mean_gsr,2nd derivative mean_gsr,LP mean_gsr,LP std_gsr,LP 1st deriv mean_gsr,LP 2nd deriv mean_gsr,ZCR LP_gsr,ZCR VLP_gsr,...,beta_PSD_Fp2,activity_Fp2,mobility_Fp2,complexity_Fp2,alpha_energy_Fp2,beta_energy_Fp2,gamma_energy_Fp2,alpha_ree_Fp2,beta_ree_Fp2,gamma_ree_Fp2
0,-0.394177,-0.853043,-0.318445,-0.622953,-0.209031,-0.786299,-0.339382,0.361181,-0.103473,0.074744,...,-0.710536,-0.848122,0.503444,-0.258046,-1.298534,-0.51531,-0.634903,-1.412771,1.891417,0.148131
1,-0.833062,-0.188345,-0.647861,-0.903261,-0.658657,-0.524978,-0.626322,0.470784,-0.931256,-2.914997,...,-0.624605,-0.380905,-0.905923,0.79415,-0.090758,-0.488023,-0.220959,0.412681,-0.498811,-0.135638
2,-2.949972,2.642031,-2.920647,-2.595122,-3.212105,2.354718,-2.848762,3.306429,-0.103473,1.569614,...,-0.049992,-0.18427,-0.789268,0.219589,0.935582,0.504392,-0.357287,1.043153,-0.814343,-1.111112
3,0.169623,-0.495352,0.217965,0.18308,0.398396,-0.448091,0.220651,-0.218379,-0.517364,0.822179,...,-0.724771,0.18081,-1.154847,1.626979,-0.796417,-0.850704,-0.32518,-0.344244,0.290749,0.328797


In [42]:
features.shape

(1280, 1490)

In [43]:
y_axis = y['Valence'].copy()
# Binary classification, 1-high, 0-low
y_axis[y_axis < 4.5] = 0
y_axis[y_axis >= 4.5] = 1

# Feature selection using RFE

In [44]:
estimator = SVC(kernel = "linear")
selector = RFE(estimator, n_features_to_select = 200, step = 100) # tried 100,200,300 feats
selector = selector.fit(features, y_axis)

In [45]:
# features.columns.values[selector.support_] # which features were chosen
features = features.to_numpy()
sel_feats = features[:,selector.support_]
np.shape(sel_feats)

(1280, 200)

# Cross Val - GridSearchCV

In [46]:
x_train, x_test, y_train, y_test = train_test_split(sel_feats, y_axis, test_size = 0.1, random_state = 999, shuffle = True, stratify = y_axis)

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1,0.1,0.01,0.001], 'kernel': ['rbf', 'poly', 'sigmoid']}
optimal_params = GridSearchCV(SVC(), param_grid, scoring = 'accuracy', cv = 10, verbose = 0)
optimal_params.fit(x_train, y_train)
print(optimal_params.best_score_)

0.6319865067466267


In [47]:
print(optimal_params.best_params_)

{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}


In [54]:
# For linear model (adventage - RFE used with the linear kernel)
param_grid = {'C': [0.01, 0.1, 1, 5], 'kernel': ['linear']}
optimal_params = GridSearchCV(SVC(), param_grid, scoring = 'accuracy', cv = 10, verbose = 0)
optimal_params.fit(x_train, y_train)
print(optimal_params.best_score_)

0.6475937031484258


In [55]:
print(optimal_params.best_params_)

{'C': 5, 'kernel': 'linear'}


In [56]:
# svm = SVC(C = 0.1, kernel = 'linear')
# svm.fit(x_train, y_train)

# y_pred = svm.predict(x_test)
# accuracy_score(y_test, y_pred)

# 10-fold CrossVal

In [61]:
s_kf = StratifiedKFold(n_splits = 10) # no shuffle
svm = SVC(C = 1, kernel = 'linear')

# best_score_val = 0
acc_array = []
conf_mat = np.zeros((2, 2)) # summing up confusion matrices for 10-folds
for train, test in s_kf.split(sel_feats, y_axis):
#     iter_score = svm.fit(sel_feats[train], y_axis[train]).score(sel_feats[test], y_axis[test])
    svm_fitt = svm.fit(sel_feats[train], y_axis[train])
    iter_score = svm_fitt.score(sel_feats[test], y_axis[test])
    conf_mat = conf_mat + confusion_matrix(y_axis[test], svm_fitt.predict(sel_feats[test]))
    print(iter_score)
    acc_array.append(iter_score)
#     if iter_score > best_score_val:
#         best_score_val = iter_score
#         best_train_ind = train
#         best_test_ind = test

0.6640625
0.59375
0.65625
0.71875
0.65625
0.671875
0.6328125
0.6875
0.6875
0.6484375


# Average confusion matrix for 10folds

In [62]:
# X_train = sel_feats[best_train_ind]
# X_test = sel_feats[best_test_ind]
# y_train = y_axis[best_train_ind]
# y_test = y_axis[best_test_ind]
# svm = SVC(C = 1, kernel = 'linear')
# svm.fit(X_train, y_train)
# y_pred = svm.predict(X_test)
# cm = confusion_matrix(y_test, y_pred, labels = svm.classes_)
# print(cm)
print(conf_mat)

[[220. 252.]
 [181. 627.]]


# Mean accuracy for 10 folds

In [2]:
print("Mean Accuracy for Valence = {:.2f}".format(np.mean(acc_array)))
print(" with standard deviation = {:.2f}".format(np.std(acc_array)))


prec = cm[1,1]/(cm[1,1] + cm[0,1])
recall = cm[1,1]/(cm[1,1] + cm[1,0])
# prec = cm[0,0]/(cm[0,0] + cm[1,0])
# recall = cm[0,0]/(cm[0,0] + cm[0,1])
f1_score = 2*(prec*recall/(prec + recall))