# Multimodal Classification of Stress from Video, Audio, and Physiological data

In [1]:
import numpy as np
import scipy.io
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
import random
import time 

import os
from pathlib import Path

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier, BernoulliRBM

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split, LeaveOneOut, GroupKFold
from sklearn.svm import LinearSVC, SVR, SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.metrics import balanced_accuracy_score, f1_score

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle, resample
from imblearn.over_sampling import SMOTE

from make_classification import *

# Import and prepare dataset

In [2]:
labels = pd.read_csv('../../Dataset/labels.csv', sep=",", header=0, index_col=0).dropna()
labels.head()

Unnamed: 0_level_0,binary-stress,affect3-class
subject/task,Unnamed: 1_level_1,Unnamed: 2_level_1
2ea4_Breathing,0,0
2ea4_Counting1,1,2
2ea4_Counting2,1,2
2ea4_Counting3,1,2
2ea4_Math,1,2


In [18]:
x_phys = pd.read_csv('../Feature Extraction/Features/all_physiological_features.csv', sep=",", header=0, index_col=0)
x_video = pd.read_csv('../Feature Extraction/Features/video11tasks_aus_gaze_mean_std.csv', sep=",", header=0, index_col=0)
x_audio = pd.read_csv('../Feature Extraction/Features/HCfeatures.csv', sep=",", header=None, index_col=0)
x_audio.set_axis([i.split('.')[0] for i in list(x_audio.index)], axis='index', inplace=True)

In [7]:
#### For W2V features
from ast import literal_eval

new_x = []
new_idx = [] # this is actually useless but whatever
process_row = lambda row: literal_eval(row[1]) + literal_eval(row[2])
process_row_quick = lambda row: literal_eval(row[1])

for idx, row in x_audio.iterrows():
    new_row = process_row_quick(row)
    new_x.append(new_row)
    new_idx.append(idx)
    
new_x = pd.DataFrame(data=new_x, index=new_idx, columns=range(1,513))
x_audio = new_x

#### Merge all modalities

In [19]:
X = x_phys.merge(x_video, left_index= True, right_index=True).merge(x_audio, left_index= True, right_index=True)

In [20]:
X

Unnamed: 0,meanHR,minHR,maxHR,sdHR,modeHR,nNN,meanNN,SDSD,CVNN,SDNN,...,131,132,133,134,135,136,137,138,139,140
2ea4_Counting1,70.973331,58.365759,82.872928,5.449003,24.507169,69.0,850.550725,42.532503,67.609179,0.079489,...,0.049624,0.056959,0.057458,0.039472,0.091337,0.075305,0.066131,0.076576,0.091099,0.062747
2ea4_Counting2,64.301154,56.285178,79.787234,5.497478,23.502056,63.0,939.587302,58.359502,76.110328,0.081004,...,0.075684,0.065511,0.060782,0.073867,0.053671,0.058367,0.091469,0.070384,0.069309,0.065220
2ea4_Counting3,66.253133,55.762082,80.645161,5.443261,24.883080,65.0,911.661538,42.853573,74.014193,0.081186,...,0.055145,0.072327,0.056967,0.043665,0.074496,0.071977,0.063752,0.063494,0.054192,0.049670
2ea4_Math,62.736057,51.635112,82.191781,7.167148,30.556669,61.0,968.065574,59.700689,103.206918,0.106611,...,0.049281,0.049601,0.047094,0.056301,0.052114,0.050279,0.056169,0.047845,0.040968,0.064372
2ea4_Reading,74.131040,55.248619,88.757396,7.404303,33.508778,73.0,818.109589,40.365451,88.435343,0.108097,...,0.053782,0.036691,0.049414,0.047562,0.044296,0.043580,0.043016,0.038888,0.058302,0.045172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
y9z6_Counting3,68.109985,60.120240,138.248848,10.826527,78.128607,66.0,894.909091,81.524121,91.725666,0.102497,...,0.072428,0.067444,0.072337,0.060692,0.062226,0.059057,0.071853,0.067436,0.065640,0.073445
y9z6_Math,69.066423,57.803468,166.666667,17.355915,108.863198,66.0,899.757576,137.616141,128.964414,0.143332,...,0.075368,0.079783,0.079030,0.077964,0.073870,0.078506,0.070326,0.077253,0.078388,0.080431
y9z6_Reading,70.045812,58.939096,133.333333,10.996977,74.394237,68.0,870.500000,94.581071,92.515420,0.106278,...,0.076081,0.081821,0.079626,0.060204,0.060455,0.060389,0.069209,0.080747,0.082700,0.093829
y9z6_Speaking,75.767263,61.601643,173.410405,22.408291,111.808762,71.0,833.521127,138.003990,143.843994,0.172574,...,0.084952,0.080655,0.086512,0.063754,0.080221,0.082238,0.074026,0.076112,0.080017,0.082434


#### List of subject_task that have all 3 modalities and corresponding labels

In [10]:
idx = list(X.merge(labels, left_index= True, right_index=True).index)
print('There are {0:2d} tasks with all 3 modalities'.format(len(idx)))

There are 370 tasks with all 3 modalities


In [11]:
labels = labels.loc[idx]
y = labels['binary-stress']
y.value_counts()

1    255
0    115
Name: binary-stress, dtype: int64

#### Balancing the dataset

As the audio component of the dataset is composed of verbal tasks only, the subset of the data featuring all modalities shows strong imbalance in the answers to the self assessments, thus limiting the subsequent analyses and affecting the classication models built on StressID. 

In order to balance the data, in each split the training set is resampled using downsampling or SMOTE.

# Classification of Binary Stress

# Unimodal baselines

Several models are tested: Random Forests, K nearets neighbors, SVM, and Multi Layer Perceptron. All models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.

To dimensionnality of the features matrix can be reduced using PCA or Feature selection. Feature selection is performed using L1 penalty or Recursive Feature Elimination (RFE). The optimal number of features is determined using RFECV.

#### Physiological features only

In [7]:
x = x_phys.loc[idx]

In [9]:
feature_selector= None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.470424,0.521094,3.676411
MLPClassifier,0.659739,0.583025,4.568843
RandomForestClassifier,0.66206,0.57249,3.817336
SVC,0.648414,0.542934,3.428785


In [10]:
print('Standard deviation:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard deviation:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.023457,0.046514,1.081279
MLPClassifier,0.054152,0.072987,0.627508
RandomForestClassifier,0.04877,0.0453,0.660402
SVC,0.043025,0.053736,0.506762


#### Videos features only

In [11]:
x = x_video.loc[idx]

In [12]:
feature_selector= None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.637153,0.635802,0.924695
MLPClassifier,0.609006,0.569675,1.698219
RandomForestClassifier,0.629569,0.580793,1.217254
SVC,0.67735,0.621625,0.813084


In [13]:
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.033166,0.035324,0.275938
MLPClassifier,0.031767,0.032929,0.423278
RandomForestClassifier,0.035083,0.039725,0.270579
SVC,0.039666,0.043879,0.170485


#### Audio features only

In [12]:
x = x_audio.loc[idx]

In [15]:
feature_selector= None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.592505,0.626429,2.765081
MLPClassifier,0.636574,0.581915,3.767431
RandomForestClassifier,0.677638,0.62193,3.049634
SVC,0.666637,0.609842,2.727086


In [16]:
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.061516,0.050459,0.565011
MLPClassifier,0.060768,0.074888,0.479263
RandomForestClassifier,0.048648,0.042891,0.155151
SVC,0.036819,0.043496,0.264714


# Multimodal baselines

In [13]:
x = X
x.columns = x.columns.astype(str)

### Feature level fusion + ML

Several models are tested: Random Forests, SVM, Multi Layer Perceptron and Deep Belief Neural network (Jaratrotkamjorn et al., 2019). All models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.



In [18]:
rbm_params = {
    'learning_rate': 0.1,
    'batch_size': 32,
    'n_iter': 20,
    'verbose': 0
}

rbm1 = BernoulliRBM(**rbm_params)
rbm2 = BernoulliRBM(**rbm_params)

DBClassifier = Pipeline(steps=[('rbm1', rbm1), ('rbm2', rbm2), ('logreg', LogisticRegression())])

In [19]:
feature_selector= 'PCA'
list_classif = [RandomForestClassifier(max_depth=5, random_state=0),  
                SVC(gamma='auto', kernel='rbf', random_state=0), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[]),
                DBClassifier
                
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MLPClassifier,0.663725,0.615927,25.344947
Pipeline,0.589698,0.529249,25.575035
RandomForestClassifier,0.652899,0.582047,25.345179
SVC,0.646521,0.566594,25.062297


In [20]:
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MLPClassifier,0.043797,0.038318,2.093238
Pipeline,0.06597,0.053314,1.897425
RandomForestClassifier,0.076018,0.071647,3.199154
SVC,0.096795,0.058674,2.612903


### SVMs + Decision level fusion (Xu et al., 2018, Rao et al., 2019)

Several decision rules are tested: sum rule, product rule, maximum rule and average rule. All models are fitted 10 times on random splits, and the average scores over 10 repetitions are reported.

In [14]:
def multimodal_decision_fusion(X, y, n_splits=10, feature_selector=False, classifier=None, impute=True, scale=True, verbose=True):
    
    df_res= pd.DataFrame({'n':[],'f1-score':[],'accuracy':[], 'fusion method':[]})
    
    if impute:
        imputer = IterativeImputer()
    else:
        imputer = None
        
    if scale:
        scaler = StandardScaler()
    else:
        scaler = None
    
    # Defaut classifier tested: SVM
    if not classifier :
        classifier = SVC(gamma='auto', kernel='rbf', random_state=0, probability = True)
    
    
    for s in range(n_splits):
        
        #Create the random split
        y_train, y_test = train_test_split(y, test_size=0.2)
        train_idx = list(y_train.index)
        test_idx = list(y_test.index)
        
        preds = []
        
        # Get the same split for each modality subset (audio, video, phys.)
        for x in X:
            x_train = x.loc[train_idx]
            x_test =  x.loc[test_idx]
            
            oversample = SMOTE()
            x_train, y_t = oversample.fit_resample(x_train, y_train)
            
            # For each modality, train a classifier
            if feature_selector:
                pca = PCA(n_components=0.95, svd_solver='full')

                clf = Pipeline([
                    ('impute',imputer), 
                    ('scale', scaler), 
                    ('pca', pca),
                    ('classification', classifier)
                ])
            else:
                clf = Pipeline([
                    ('impute',imputer), 
                    ('scale', scaler), 
                    ('classification', classifier)
                ])
            # Fit        
            clf.fit(x_train, y_t)
            
            # Retrieve accuracy and F1-score
            y_pred = clf.predict_proba(x_test)
            preds.append(y_pred)
            
            
        # For each split, fuse the decisions     
        if verbose:
                print('Split {0:2d}/{1:2d}'.format(s+1, n_splits))
                
        y_sum_rule = np.argmax(np.sum(preds, axis=0), axis=1)
        y_prod_rule = np.argmax(np.prod(preds, axis=0), axis=1)
        y_avg_rule = np.argmax(np.mean(preds, axis=0), axis=1)
        y_max_rule = np.argmax(np.max(preds, axis=0), axis=1) 
                
        df_res = df_res.append({'n':int(s),'f1-score':f1_score(y_test, y_sum_rule, average='weighted'),
                                    'accuracy':balanced_accuracy_score(y_test, y_sum_rule), 
                                    'fusion method':'Sum'},ignore_index=True)
        df_res = df_res.append({'n':int(s),'f1-score':f1_score(y_test, y_prod_rule, average='weighted'),
                                    'accuracy':balanced_accuracy_score(y_test, y_prod_rule), 
                                    'fusion method':'Product'},ignore_index=True)
        df_res = df_res.append({'n':int(s),'f1-score':f1_score(y_test, y_avg_rule, average='weighted'),
                                    'accuracy':balanced_accuracy_score(y_test, y_avg_rule), 
                                    'fusion method':'Average'},ignore_index=True)
        df_res = df_res.append({'n':int(s),'f1-score':f1_score(y_test, y_max_rule, average='weighted'),
                                    'accuracy':balanced_accuracy_score(y_test, y_max_rule), 
                                    'fusion method':'Maximum'},ignore_index=True)
    
    return df_res

In [21]:
X_ = [x_phys.loc[idx], x_video.loc[idx], x_audio.loc[idx]] 
res = multimodal_decision_fusion(X_, y, n_splits=10, feature_selector=False, classifier=None, impute=True, scale=True, verbose=True)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


In [22]:
print('Average scores:')
res.groupby(['fusion method']).mean()[['f1-score', 'accuracy']]

Average scores:


Unnamed: 0_level_0,f1-score,accuracy
fusion method,Unnamed: 1_level_1,Unnamed: 2_level_1
Average,0.68075,0.620311
Maximum,0.658458,0.590629
Product,0.677721,0.616664
Sum,0.68075,0.620311


In [23]:
print('Standard deviation:')
res.groupby(['fusion method']).std()[['f1-score', 'accuracy']]

Standard deviation:


Unnamed: 0_level_0,f1-score,accuracy
fusion method,Unnamed: 1_level_1,Unnamed: 2_level_1
Average,0.058085,0.06508
Maximum,0.060912,0.058727
Product,0.062523,0.064117
Sum,0.058085,0.06508


# Classification of 3-class Stress

In [43]:
labels = labels.loc[idx]
y = labels['affect3-class']
y.value_counts()

2    181
1    121
0     68
Name: affect3-class, dtype: int64

#### Physiological features only

In [44]:
x = x_phys.loc[idx]

feature_selector= None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.38341,0.41234,3.97214
MLPClassifier,0.490522,0.456036,5.552881
RandomForestClassifier,0.509238,0.481506,4.645801
SVC,0.503339,0.459486,4.387486


In [46]:
print('Standard deviation:')
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Standard deviation:


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.053244,0.049996,0.870104
MLPClassifier,0.041509,0.044314,1.468419
RandomForestClassifier,0.0594,0.062424,0.786586
SVC,0.051752,0.054886,0.887611


#### Videos features only

In [60]:
x = x_video.loc[idx]

feature_selector= None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.518888,0.549537,0.735935
MLPClassifier,0.52411,0.526601,2.036188
RandomForestClassifier,0.549462,0.540889,1.115707
SVC,0.583186,0.563164,0.770009


In [48]:
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.044863,0.03559,0.032522
MLPClassifier,0.056727,0.057039,0.380924
RandomForestClassifier,0.055365,0.046585,0.013551
SVC,0.050242,0.043462,0.017297


#### Audio features only

In [49]:
x = x_audio.loc[idx]

feature_selector= None
list_classif = [RandomForestClassifier(max_depth=5, random_state=0), 
                KNeighborsClassifier(n_neighbors=3), 
                SVC(gamma='auto', kernel='rbf'), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[])
]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.404606,0.464644,2.881928
MLPClassifier,0.517395,0.492113,5.211042
RandomForestClassifier,0.561543,0.533102,3.777326
SVC,0.562356,0.543625,3.228422


In [51]:
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNeighborsClassifier,0.068242,0.062189,0.267287
MLPClassifier,0.065084,0.058234,1.95275
RandomForestClassifier,0.035638,0.041833,1.148762
SVC,0.060419,0.060718,1.11924


#### Feature level fusion + ML

In [61]:
x = X
x.columns = x.columns.astype(str)

In [62]:
feature_selector= 'PCA'
list_classif = [RandomForestClassifier(max_depth=5, random_state=0),  
                SVC(gamma='auto', kernel='rbf', random_state=0), 
                MLPClassifier(max_iter=5000, random_state=0, hidden_layer_sizes=[]),
                DBClassifier
                ]
n_splits=10

res, conf = make_nclassif_random_splits_resample(x, y, n_splits=n_splits, 
                    resamp='SMOTE', feature_selector=feature_selector, 
                    list_classifiers = list_classif, verbose = True)
avg_res(res)

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10


Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MLPClassifier,0.516237,0.510915,33.006635
Pipeline,0.305324,0.329981,32.343335
RandomForestClassifier,0.493972,0.463853,32.084492
SVC,0.555028,0.513555,31.954127


In [63]:
res.groupby(['classifier']).std()[['f1-score', 'accuracy', 'time']]

Unnamed: 0_level_0,f1-score,accuracy,time
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MLPClassifier,0.071693,0.075963,2.4888
Pipeline,0.093418,0.047609,2.848335
RandomForestClassifier,0.074187,0.073674,2.880778
SVC,0.062542,0.05305,2.295514


#### SVMs + Decision level fusion (Xu et al., 2018, Rao et al., 2019)

In [59]:
X_ = [x_phys.loc[idx], x_video.loc[idx], x_audio.loc[idx]] 
res = multimodal_decision_fusion(X_, y, n_splits=10, feature_selector=False, classifier=None, impute=True, scale=True, verbose=True)

print('Average scores:')
res.groupby(['fusion method']).mean()[['f1-score', 'accuracy']]

Split  1/10
Split  2/10
Split  3/10
Split  4/10
Split  5/10
Split  6/10
Split  7/10
Split  8/10
Split  9/10
Split 10/10
Average scores:


Unnamed: 0_level_0,f1-score,accuracy
fusion method,Unnamed: 1_level_1,Unnamed: 2_level_1
Average,0.627157,0.585002
Maximum,0.614504,0.576901
Product,0.609547,0.563009
Sum,0.627157,0.585002


In [55]:
print('Standard deviation:')
res.groupby(['fusion method']).std()[['f1-score', 'accuracy']]

Standard deviation:


Unnamed: 0_level_0,f1-score,accuracy
fusion method,Unnamed: 1_level_1,Unnamed: 2_level_1
Average,0.058009,0.074931
Maximum,0.064421,0.073418
Product,0.059427,0.07217
Sum,0.058009,0.074931
