In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')


from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier

In [2]:
df = pd.read_csv('data/combined_subjects7.csv', index_col=0)
subject_id_list = df['subject'].unique()


In [3]:
feats_all =   ['BVP_mean', 'BVP_std', 'BVP_min', 'BVP_max',
           'EDA_phasic_mean', 'EDA_phasic_std', 'EDA_phasic_min', 'EDA_phasic_max', 'EDA_smna_mean',
           'EDA_smna_std', 'EDA_smna_min', 'EDA_smna_max', 'EDA_tonic_mean',
           'EDA_tonic_std', 'EDA_tonic_min', 'EDA_tonic_max', 'Resp_mean',
           'Resp_std', 'Resp_min', 'Resp_max', 'TEMP_mean', 'TEMP_std', 'TEMP_min',
           'TEMP_max', 'TEMP_slope', 'BVP_peak_freq','BVP_peaks_cnt','Resp_peak_cnt','subject', 'label']

feats_without_EDA =   ['BVP_mean', 'BVP_std', 'BVP_min', 'BVP_max','Resp_mean','Resp_std', 'Resp_min', 'Resp_max', 'TEMP_mean', 'TEMP_std', 'TEMP_min',
           'TEMP_max', 'TEMP_slope', 'BVP_peak_freq','BVP_peaks_cnt','Resp_peak_cnt','subject', 'label']

feats_EDA = ['EDA_phasic_mean', 'EDA_phasic_std', 'EDA_phasic_min', 'EDA_phasic_max', 'EDA_smna_mean',
           'EDA_smna_std', 'EDA_smna_min', 'EDA_smna_max', 'EDA_tonic_mean',
           'EDA_tonic_std', 'EDA_tonic_min', 'EDA_tonic_max','subject', 'label']
df=df[feats_all]

In [4]:
subjects = df['subject'].unique()

In [5]:
x=df.drop(columns=['label'])
y=df['label']

In [6]:
def get_binary_label(label):
    if label == 0 or label == 1:
        return 0
    else:
        return 1

In [7]:
y = y.apply(get_binary_label)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state = 42)

In [9]:
params={'n_estimators':[50,100,130,145,150,160,200,500]}

model = AdaBoostClassifier()
clf = GridSearchCV(model,param_grid=params,cv=3,verbose=10,n_jobs=-1)

In [10]:
adb=clf.fit(X_train,y_train)

Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done   4 out of  24 | elapsed:    3.5s remaining:   18.0s
[Parallel(n_jobs=-1)]: Done   7 out of  24 | elapsed:    4.0s remaining:    9.8s
[Parallel(n_jobs=-1)]: Done  10 out of  24 | elapsed:    4.1s remaining:    5.8s
[Parallel(n_jobs=-1)]: Done  13 out of  24 | elapsed:    4.6s remaining:    3.8s
[Parallel(n_jobs=-1)]: Done  16 out of  24 | elapsed:    4.8s remaining:    2.4s
[Parallel(n_jobs=-1)]: Done  19 out of  24 | elapsed:    5.5s remaining:    1.4s
[Parallel(n_jobs=-1)]: Done  22 out of  24 | elapsed:    6.9s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    7.1s finished


In [11]:
best_ntrees=adb.best_params_['n_estimators']
best_ntrees

150

In [12]:
preds={}
clr={}
for i in subjects:
    x_train=x[x['subject']!=i]
    x_test=x[x['subject']==i]
    y_train=y[x['subject']!=i]
    y_test=y[x['subject']==i]
    
    clf = AdaBoostClassifier(n_estimators=best_ntrees)
    
    best=clf.fit(x_train, y_train)
    
    y_predicted = best.predict(x_test)
    
    print("Subject ",i)
     
    preds[i]=y_predicted
    print(classification_report(y_predicted,y_test))
    a=classification_report(y_predicted,y_test,target_names=['Non-Stress','Stress'],output_dict=True)
    clr[i]=a
    

Subject  2
              precision    recall  f1-score   support

           0       1.00      0.89      0.94       183
           1       0.70      1.00      0.82        46

    accuracy                           0.91       229
   macro avg       0.85      0.95      0.88       229
weighted avg       0.94      0.91      0.92       229

Subject  3
              precision    recall  f1-score   support

           0       0.72      0.94      0.82       126
           1       0.88      0.58      0.70       106

    accuracy                           0.77       232
   macro avg       0.80      0.76      0.76       232
weighted avg       0.80      0.77      0.76       232

Subject  4
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       165
           1       1.00      1.00      1.00        68

    accuracy                           1.00       233
   macro avg       1.00      1.00      1.00       233
weighted avg       1.00      1.00      1.

In [13]:
acc=0
nonstress_f1=0
stress_f1=0
for i in subjects:
    acc=acc+clr[i]['accuracy']
    nonstress_f1=nonstress_f1+clr[i]['Non-Stress']['f1-score']
    stress_f1=stress_f1+clr[i]['Stress']['f1-score']

print("Average Accuracy : ",acc/len(subjects))
print("F1 score for Non Stress : ",nonstress_f1/len(subjects))
print("F1 score for Stress : ",stress_f1/len(subjects))

Average Accuracy :  0.8707929284681686
F1 score for Non Stress :  0.8737592241505355
F1 score for Stress :  0.8323164169219973
