# Classification of Stress from Physiological data

In [1]:
import numpy as np
import scipy.io
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
import random
import time 

import os
from pathlib import Path

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, LeaveOneOut, GroupKFold
from sklearn.svm import LinearSVC, SVR, SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.metrics import balanced_accuracy_score, f1_score

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle

from make_classification import *

#### Import labels

In [2]:
labels = pd.read_csv('../../Dataset/labels.csv', sep=",", header=0, index_col=0).dropna()
labels.head()

Unnamed: 0_level_0,binary-stress,affect3-class
subject/task,Unnamed: 1_level_1,Unnamed: 2_level_1
2ea4_Breathing,0,0
2ea4_Counting1,1,2
2ea4_Counting2,1,2
2ea4_Counting3,1,2
2ea4_Math,1,2


# Classical approaches

Several models are tested: Random Forests, K nearets neighbors, SVM, and Multi Layer Perceptron. All models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.

To dimensionnality of the features matrix can be reduced using PCA or Feature selection. Feature selection is performed using L1 penalty or Recursive Feature Elimination (RFE). The optimal number of features is determined using RFECV.

#### Import and prepare dataset
We use handcrafted features.

In [4]:
X = pd.read_csv('../Feature Extraction/Features/all_physiological_features.csv', sep=",", header=0, index_col=0)
X

Unnamed: 0,meanHR,minHR,maxHR,sdHR,modeHR,nNN,meanNN,SDSD,CVNN,SDNN,...,min_scl,mean_scl,sd_scl,nSCR,aucSCR,meanAmpSCR,maxAmpSCR,meanRespSCR,sumAmpSCR,sumRespSCR
2ea4_Baseline,63.430940,57.034221,79.575597,4.686314,22.541376,62.000000,950.677419,42.033401,64.843179,0.068207,...,-1.132743,0.022668,0.891539,10.000000,-302.978934,0.666899,1.475682,1.160000,6.668994,11.600000
2ea4_Breathing,61.712465,45.871560,84.269663,11.008515,38.398103,59.666667,1002.893855,106.659486,173.316531,0.172816,...,-0.968230,-0.006435,0.951185,7.333333,120.038332,0.651699,1.748524,1.126095,4.779126,7.882667
2ea4_Counting1,70.973331,58.365759,82.872928,5.449003,24.507169,69.000000,850.550725,42.532503,67.609179,0.079489,...,-2.159093,0.012778,0.827482,7.000000,-92.589522,0.860715,2.091642,2.433429,6.025006,17.034000
2ea4_Counting2,64.301154,56.285178,79.787234,5.497478,23.502056,63.000000,939.587302,58.359502,76.110328,0.081004,...,-1.167276,-0.011765,0.937448,6.000000,315.591734,0.543249,1.502306,1.810000,3.259493,10.860000
2ea4_Counting3,66.253133,55.762082,80.645161,5.443261,24.883080,65.000000,911.661538,42.853573,74.014193,0.081186,...,-1.494708,-0.015553,0.904099,12.000000,558.708830,0.721123,1.743292,1.282167,8.653481,15.386000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
y9z6_Relax,58.768442,50.933786,75.000000,4.025131,24.066214,58.000000,1025.475862,73.281653,66.410643,0.064761,...,-1.017887,-0.000668,0.990734,2.400000,122.464218,0.197116,0.600114,13.550667,0.473079,32.521600
y9z6_Speaking,75.767263,61.601643,173.410405,22.408291,111.808762,71.000000,833.521127,138.003990,143.843994,0.172574,...,-2.730086,0.022081,0.680286,5.000000,205.591733,0.939958,1.721442,2.791200,4.699788,13.956000
y9z6_Stroop,64.712474,58.027079,73.710074,3.843868,15.682994,63.000000,930.412698,45.572140,54.571530,0.058653,...,-1.291715,-0.029019,0.932817,5.000000,608.317194,0.320072,0.772637,1.639200,1.600359,8.196000
y9z6_Video1,60.061609,53.191489,75.757576,3.684374,22.566086,59.310345,1002.581395,58.998370,59.073755,0.058922,...,-1.833577,0.002666,0.974272,3.103448,5.575997,0.310237,1.061803,3.314667,0.962805,10.286897


In [5]:
idx = list(X.merge(labels, left_index= True, right_index=True).index)
labels = labels.loc[idx]
x = X.loc[idx]

### Classification of binary stress

In [6]:
y = labels['binary-stress']
y.value_counts()

1    367
0    332
Name: binary-stress, dtype: int64

In [9]:
feature_selector= None ###  'PCA', 'RFE', 'L1' or None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf', random_state=0), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

In [10]:
res, conf = make_nclassif_random_splits(x, y, n_splits=n_splits, 
                    feature_selector=feature_selector, 
                    list_classifiers = list_classif)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.658397,0.659295,3.758574
MLPClassifier,0.704339,0.705516,4.398804
RandomForestClassifier,0.723241,0.725716,4.31579
SVC,0.715608,0.717623,3.829964


In [11]:
print('Standard Deviations over 10 splits:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard Deviations over 10 splits:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.024769,0.025439,0.111651
MLPClassifier,0.03165,0.032008,0.403645
RandomForestClassifier,0.029584,0.0306,0.275857
SVC,0.025741,0.027851,0.136337


In [14]:
#res.to_csv('Results/phys_stress_classif.csv', sep=",", index=True)

### Classification of 3-class stress

In [12]:
y = labels['affect3-class']
y.value_counts()

0    253
2    244
1    202
Name: affect3-class, dtype: int64

In [13]:
res, conf = make_nclassif_random_splits(x, y, n_splits=n_splits, 
                    feature_selector=feature_selector, 
                    list_classifiers = list_classif)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.510267,0.504914,3.591494
MLPClassifier,0.537366,0.52672,4.52754
RandomForestClassifier,0.559803,0.56878,3.967809
SVC,0.588597,0.585403,3.713235


In [14]:
print('Standard Deviations over 10 splits:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard Deviations over 10 splits:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.037314,0.03245,0.255454
MLPClassifier,0.039444,0.041654,0.392419
RandomForestClassifier,0.049477,0.032489,0.220771
SVC,0.045088,0.036994,0.317085


In [None]:
#res.to_csv('Results/phys_3stress_classif.csv', sep=",", index=True)