In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')


from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [16]:
df = pd.read_csv('data/combined_subjects7.csv', index_col=0)
subject_id_list = df['subject'].unique()


In [17]:
feats_all =   ['BVP_mean', 'BVP_std', 'BVP_min', 'BVP_max',
           'EDA_phasic_mean', 'EDA_phasic_std', 'EDA_phasic_min', 'EDA_phasic_max', 'EDA_smna_mean',
           'EDA_smna_std', 'EDA_smna_min', 'EDA_smna_max', 'EDA_tonic_mean',
           'EDA_tonic_std', 'EDA_tonic_min', 'EDA_tonic_max', 'Resp_mean',
           'Resp_std', 'Resp_min', 'Resp_max', 'TEMP_mean', 'TEMP_std', 'TEMP_min',
           'TEMP_max', 'TEMP_slope', 'BVP_peak_freq','BVP_peaks_cnt','Resp_peak_cnt','subject', 'label']

feats_without_EDA =   ['BVP_mean', 'BVP_std', 'BVP_min', 'BVP_max','Resp_mean','Resp_std', 'Resp_min', 'Resp_max', 'TEMP_mean', 'TEMP_std', 'TEMP_min',
           'TEMP_max', 'TEMP_slope', 'BVP_peak_freq','BVP_peaks_cnt','Resp_peak_cnt','subject', 'label']

feats_EDA = ['EDA_phasic_mean', 'EDA_phasic_std', 'EDA_phasic_min', 'EDA_phasic_max', 'EDA_smna_mean',
           'EDA_smna_std', 'EDA_smna_min', 'EDA_smna_max', 'EDA_tonic_mean',
           'EDA_tonic_std', 'EDA_tonic_min', 'EDA_tonic_max','subject', 'label']
df=df[feats_all]

In [18]:
subjects = df['subject'].unique()

In [19]:
x=df.drop(columns=['label'])
y=df['label']

In [20]:
def get_binary_label(label):
    if label == 0 or label == 1:
        return 0
    else:
        return 1

In [21]:
y = y.apply(get_binary_label)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state = 42)

In [23]:
params={'n_estimators':[10,20,40,50,70,100,150,200,500,700,1000,1500]}
model = RandomForestClassifier()
clf = GridSearchCV(model,param_grid=params,cv=3,n_jobs=-1)

In [24]:
rf=clf.fit(X_train,y_train)

In [25]:
best_ntrees=rf.best_params_['n_estimators']
best_ntrees

700

In [26]:
preds={}
clr={}
for i in subjects:
    x_train=x[x['subject']!=i]
    x_test=x[x['subject']==i]
    y_train=y[x['subject']!=i]
    y_test=y[x['subject']==i]
    
    clf = RandomForestClassifier(n_estimators=100)
    
    best=clf.fit(x_train, y_train)
    
    y_predicted = best.predict(x_test)
    
    print("Subject ",i)
     
    preds[i]=y_predicted
    print(classification_report(y_predicted,y_test))
    a=classification_report(y_predicted,y_test,target_names=['Non-Stress','Stress'],output_dict=True)
    clr[i]=a

Subject  2
              precision    recall  f1-score   support

           0       1.00      0.92      0.96       178
           1       0.77      1.00      0.87        51

    accuracy                           0.93       229
   macro avg       0.89      0.96      0.91       229
weighted avg       0.95      0.93      0.94       229

Subject  3
              precision    recall  f1-score   support

           0       0.80      0.85      0.83       154
           1       0.67      0.59      0.63        78

    accuracy                           0.76       232
   macro avg       0.74      0.72      0.73       232
weighted avg       0.76      0.76      0.76       232

Subject  4
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       165
           1       1.00      1.00      1.00        68

    accuracy                           1.00       233
   macro avg       1.00      1.00      1.00       233
weighted avg       1.00      1.00      1.

In [28]:
acc=0
nonstress_f1=0
stress_f1=0
for i in subjects:
    acc=acc+clr[i]['accuracy']
    nonstress_f1=nonstress_f1+clr[i]['Non-Stress']['f1-score']
    stress_f1=stress_f1+clr[i]['Stress']['f1-score']

print("Average Accuracy : ",acc/len(subjects))
print("F1 score for Non Stress : ",nonstress_f1/len(subjects))
print("F1 score for Stress : ",stress_f1/len(subjects))

Average Accuracy :  0.8721357744690577
F1 score for Non Stress :  0.8813965174757189
F1 score for Stress :  0.8204521830186986
