# Classification of Stress from Physiological data

In [1]:
import numpy as np
import scipy.io
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
import random
import time 

import os
from pathlib import Path

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, LeaveOneOut, GroupKFold
from sklearn.svm import LinearSVC, SVR, SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.metrics import balanced_accuracy_score, f1_score

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle

from make_classification import *

#### Import labels

In [2]:
labels = pd.read_csv('../../stressid-dataset/labels.csv', sep=",", header=0, index_col=0).dropna()
labels.head()

Unnamed: 0_level_0,binary-stress,affect3-class
subject/task,Unnamed: 1_level_1,Unnamed: 2_level_1
2ea4_Breathing,0,0
2ea4_Counting1,1,2
2ea4_Counting2,1,2
2ea4_Counting3,1,2
2ea4_Math,1,2


# Classical approaches

Several models are tested: Random Forests, K nearets neighbors, SVM, and Multi Layer Perceptron. All models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.

The dimensionality of the features matrix can be reduced using PCA or Feature selection. Feature selection is performed using L1 penalty or Recursive Feature Elimination (RFE). The optimal number of features is determined using RFECV.

#### Import and prepare dataset
We use handcrafted features.

In [3]:
X = pd.read_csv('../../Reprod-Features-OGWelch/all_physiological_features.csv', sep=",", header=0, index_col=0)
X

Unnamed: 0,meanHR,minHR,maxHR,sdHR,modeHR,nNN,meanNN,SDSD,CVNN,SDNN,...,min_scl,mean_scl,sd_scl,nSCR,aucSCR,meanAmpSCR,maxAmpSCR,meanRespSCR,sumAmpSCR,sumRespSCR
2ea4_Baseline,63.430749,57.034221,79.575597,4.685361,22.541376,62.000000,950.677419,41.894327,64.816310,0.068179,...,-1.132743,0.022668,0.891539,10.000000,-302.978934,0.666899,1.475682,1.160000,6.668994,11.600000
2ea4_Breathing,61.712623,45.871560,84.033613,11.009485,38.162054,59.666667,1002.893855,106.620821,173.316789,0.172817,...,-0.968230,-0.006435,0.951185,7.333333,120.038332,0.651699,1.748524,1.126095,4.779126,7.882667
2ea4_Counting1,70.973286,58.479532,82.872928,5.448858,24.393396,69.000000,850.550725,42.532503,67.603176,0.079482,...,-2.159093,0.012778,0.827482,7.000000,-92.589522,0.860715,2.091642,2.433429,6.025006,17.034000
2ea4_Counting2,64.301095,56.285178,79.787234,5.497250,23.502056,63.000000,939.587302,58.386027,76.102820,0.080996,...,-1.167276,-0.011765,0.937448,6.000000,315.591734,0.543249,1.502306,1.810000,3.259493,10.860000
2ea4_Counting3,66.253079,55.762082,80.645161,5.443328,24.883080,65.000000,911.661538,42.884190,74.003384,0.081174,...,-1.494708,-0.015553,0.904099,12.000000,558.708830,0.721123,1.743292,1.282167,8.653481,15.386000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
y9z6_Relax,58.768320,50.933786,75.000000,4.024016,24.066214,58.000000,1025.475862,73.275208,66.397765,0.064748,...,-1.017887,-0.000668,0.990734,2.400000,122.464218,0.197116,0.600114,13.550667,0.473079,32.521600
y9z6_Speaking,79.574294,61.601643,176.470588,27.277891,114.868946,73.000000,810.684932,171.499251,168.914413,0.208360,...,-2.730086,0.022081,0.680286,5.000000,205.591732,0.939958,1.721442,2.791200,4.699788,13.956000
y9z6_Stroop,64.712598,58.027079,73.710074,3.844910,15.682994,63.000000,930.412698,45.644284,54.586653,0.058669,...,-1.291715,-0.029019,0.932817,5.000000,608.317194,0.320072,0.772637,1.639200,1.600359,8.196000
y9z6_Video1,60.061811,53.191489,75.757576,3.685925,22.566086,59.310345,1002.581395,59.116008,59.104060,0.058952,...,-1.833577,0.002666,0.974272,3.103448,5.575997,0.310237,1.061803,3.314667,0.962805,10.286897


In [4]:
idx = list(X.merge(labels, left_index= True, right_index=True).index)
labels = labels.loc[idx]
x = X.loc[idx]

### Classification of binary stress

In [5]:
y = labels['binary-stress']
y.value_counts()

binary-stress
1    367
0    332
Name: count, dtype: int64

In [36]:
feature_selector= "RFE" ###  'PCA', 'RFE', 'L1' or None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf', random_state=0), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

In [None]:
res, conf = make_nclassif_random_splits(x, y, n_splits=n_splits, 
                    feature_selector=feature_selector, 
                    list_classifiers = list_classif,
                    random_seed=11)
avg_res(res)

Split  1/10
Split  2/10


In [8]:
print('Standard Deviations over 10 splits:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard Deviations over 10 splits:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.030089,0.028773,0.015239
MLPClassifier,0.054729,0.053026,0.023975
RandomForestClassifier,0.043404,0.042589,0.025723
SVC,0.039236,0.037766,0.015864


In [9]:
#res.to_csv('Results/phys_stress_classif.csv', sep=",", index=True)

### Classification of 3-class stress

In [10]:
y = labels['affect3-class']
y.value_counts()

affect3-class
0    253
2    244
1    202
Name: count, dtype: int64

In [11]:
res, conf = make_nclassif_random_splits(x, y, n_splits=n_splits, 
                    feature_selector=feature_selector, 
                    list_classifiers = list_classif,
                    random_seed=11)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.509183,0.502529,0.93217
MLPClassifier,0.527986,0.518086,1.153585
RandomForestClassifier,0.522961,0.538875,1.081498
SVC,0.562983,0.561838,0.95786


In [12]:
print('Standard Deviations over 10 splits:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard Deviations over 10 splits:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.04108,0.040353,0.077299
MLPClassifier,0.036788,0.037332,0.041693
RandomForestClassifier,0.041421,0.034154,0.052393
SVC,0.031955,0.029095,0.045219


In [None]:
#res.to_csv('Results/phys_3stress_classif.csv', sep=",", index=True)