# Classification of Stress from Video data

In [1]:
import numpy as np
import scipy.io
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
import random
import time 

import os
from pathlib import Path

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, LeaveOneOut, GroupKFold
from sklearn.svm import LinearSVC, SVR, SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.metrics import balanced_accuracy_score, f1_score

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle

from make_classification import *

#### Import labels

In [2]:
labels = pd.read_csv('../../Dataset/labels.csv', sep=",", header=0, index_col=0).dropna()
labels.head()

Unnamed: 0_level_0,binary-stress,affect3-class
subject/task,Unnamed: 1_level_1,Unnamed: 2_level_1
2ea4_Breathing,0,0
2ea4_Counting1,1,2
2ea4_Counting2,1,2
2ea4_Counting3,1,2
2ea4_Math,1,2


# Classical approaches

Several models are tested: Random Forests, K nearets neighbors, SVM, and Multi Layer Perceptron. All models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.

To dimensionnality of the features matrix can be reduced using PCA or Feature selection. Feature selection is performed using L1 penalty or Recursive Feature Elimination (RFE). The optimal number of features is determined using RFECV.

#### Import and prepare dataset
We use handcrafted features for the classical approaches: the mean and standard deviation of each action unit is computed for each task.

In [3]:
X = pd.read_csv('../Feature Extraction/Features/video11tasks_aus_gaze_mean_std.csv', sep=",", header=0, index_col=0)
X

Unnamed: 0_level_0,mean_AU01_r,mean_AU02_r,mean_AU04_r,mean_AU05_r,mean_AU06_r,mean_AU07_r,mean_AU09_r,mean_AU10_r,mean_AU12_r,mean_AU14_r,...,std_AU26_c,std_AU45_c,std_gaze_0_x,std_gaze_0_y,std_gaze_0_z,std_gaze_1_x,std_gaze_1_y,std_gaze_1_z,std_gaze_angle_x,std_gaze_angle_y
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2ea4_Breathing,0.053544,0.030378,0.975433,0.034078,0.154789,0.000000,0.017178,0.314189,0.000511,0.011533,...,0.000000,0.128090,0.036566,0.029348,0.004029,0.031988,0.030634,0.004312,0.030840,0.029467
2ea4_Counting1,0.121233,0.055100,0.877633,0.097667,0.776300,0.000500,0.025833,0.261833,0.219033,0.103733,...,0.225073,0.225073,0.033509,0.088476,0.012939,0.044910,0.090585,0.011769,0.035252,0.090543
2ea4_Counting2,0.239933,0.121933,0.699067,0.043600,0.361467,0.000600,0.040133,0.466533,0.054867,0.180700,...,0.261118,0.494913,0.091534,0.144721,0.015324,0.076520,0.156368,0.016675,0.082406,0.152070
2ea4_Counting3,0.243400,0.175633,0.921500,0.052467,0.513733,0.004333,0.044033,0.220967,0.029333,0.068033,...,0.161376,0.225073,0.038788,0.058931,0.008472,0.048979,0.064065,0.008068,0.040383,0.061490
2ea4_Math,0.093867,0.044367,0.977233,0.030967,0.665600,0.000200,0.030333,0.475333,0.068633,0.116800,...,0.140234,0.286660,0.030214,0.036247,0.002941,0.036639,0.033594,0.003048,0.028685,0.033834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
y9z6_Relax,0.316987,0.092360,0.544120,0.058907,0.096773,0.140613,0.092387,0.588133,0.293547,1.767467,...,0.408054,0.489951,0.116735,0.082581,0.048241,0.097324,0.062890,0.042341,0.107584,0.073130
y9z6_Speaking,0.280267,0.174800,0.398467,0.078300,1.244167,0.275267,0.088267,1.178500,1.602867,1.791633,...,0.452960,0.498951,0.206403,0.099980,0.084383,0.185432,0.104782,0.087148,0.203716,0.101155
y9z6_Stroop,0.288200,0.148133,0.507933,0.067533,0.790300,0.155100,0.067333,1.107633,1.476133,1.829333,...,0.435647,0.469778,0.132046,0.074969,0.023335,0.101070,0.065041,0.038047,0.118659,0.063418
y9z6_Video1,0.113943,0.092494,0.479989,0.070552,0.378057,0.121609,0.171460,0.740391,1.118161,2.107299,...,0.273926,0.480543,0.085846,0.053932,0.014006,0.057236,0.052596,0.025581,0.061491,0.050582


In [4]:
idx = list(X.merge(labels, left_index= True, right_index=True).index)
labels = labels.loc[idx]
x = X.loc[idx]

### Classification of binary stress

In [10]:
y = labels['binary-stress']
y.value_counts()

1    317
0    283
Name: binary-stress, dtype: int64

In [13]:
feature_selector= None
list_classif = [#RandomForestClassifier(max_depth=5, random_state=0),
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(64, 64)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(128, 128)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(256, 256)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(64, 64, 64)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(128, 128, 128)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(256, 256, 256)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(64, 64, 64, 64)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(128, 128, 128, 128)),
                MLPClassifier(max_iter=500, random_state=0, hidden_layer_sizes=(256, 256, 256, 256))
]
n_splits=10

In [14]:
res, conf = make_nclassif_random_splits(x, y, n_splits=n_splits, 
                    feature_selector=feature_selector, 
                    list_classifiers = list_classif)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.701995,0.701887,1.048699
MLPClassifier_2_128,0.6754,0.676371,2.44388
MLPClassifier_2_256,0.694357,0.696828,3.100957
MLPClassifier_2_64,0.683503,0.683776,2.228803
MLPClassifier_3_128,0.654709,0.654876,2.095852
MLPClassifier_3_256,0.697854,0.698221,2.827694
MLPClassifier_3_64,0.670366,0.671002,1.906241
MLPClassifier_4_128,0.67484,0.674602,1.95416
MLPClassifier_4_256,0.69903,0.699456,3.315197
MLPClassifier_4_64,0.693373,0.694269,1.760813


In [19]:
print('Standard Deviations over 10 splits:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard Deviations over 10 splits:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.03692,0.038242,0.355539
MLPClassifier_2_128,0.028984,0.033862,1.562009
MLPClassifier_2_256,0.02495,0.029663,1.493921
MLPClassifier_2_64,0.033094,0.034804,2.702989
MLPClassifier_3_128,0.044244,0.047898,0.914902
MLPClassifier_3_256,0.027574,0.031482,1.393645
MLPClassifier_3_64,0.054857,0.055784,0.75755
MLPClassifier_4_128,0.044856,0.050016,0.807491
MLPClassifier_4_256,0.031708,0.032985,1.528057
MLPClassifier_4_64,0.030742,0.031572,0.801113


In [None]:
#res.to_csv('Results/video_stress_classif.csv', sep=",", index=True)

### Classification of 3-class stress

In [16]:
y = labels['affect3-class']
y.value_counts()

0    212
2    211
1    177
Name: affect3-class, dtype: int64

In [17]:
res, conf = make_nclassif_random_splits(x, y, n_splits=n_splits, 
                    feature_selector=feature_selector, 
                    list_classifiers = list_classif)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.533919,0.531247,1.414495
MLPClassifier_2_128,0.538359,0.535384,3.98055
MLPClassifier_2_256,0.548568,0.54619,5.034363
MLPClassifier_2_64,0.544287,0.543195,4.036625
MLPClassifier_3_128,0.529345,0.524568,3.275688
MLPClassifier_3_256,0.545142,0.543727,4.340021
MLPClassifier_3_64,0.524689,0.523034,2.76615
MLPClassifier_4_128,0.539787,0.535443,2.919008
MLPClassifier_4_256,0.550895,0.551012,4.160852
MLPClassifier_4_64,0.525459,0.523884,2.660571


In [18]:
print('Standard Deviations over 10 splits:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard Deviations over 10 splits:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.03692,0.038242,0.355539
MLPClassifier_2_128,0.028984,0.033862,1.562009
MLPClassifier_2_256,0.02495,0.029663,1.493921
MLPClassifier_2_64,0.033094,0.034804,2.702989
MLPClassifier_3_128,0.044244,0.047898,0.914902
MLPClassifier_3_256,0.027574,0.031482,1.393645
MLPClassifier_3_64,0.054857,0.055784,0.75755
MLPClassifier_4_128,0.044856,0.050016,0.807491
MLPClassifier_4_256,0.031708,0.032985,1.528057
MLPClassifier_4_64,0.030742,0.031572,0.801113


In [None]:
#res.to_csv('Results/video_3stress_classif.csv', sep=",", index=True)

# Deep Learning approches

Action Units (AU) used directly (without aggregation) in the deep learning approaches. The AUs are used as input for two models: Transformer network, and LSTM network. 
Both models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.