In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit
from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC

In [2]:
# 14, 6, 17, 3, 2, 13, 9, 10, 15, 8, 7, 11, 4, 5, 16
SUBJECT_ID_TEST = [6, 17, 3, 2, 13, 9, 10, 15, 8, 7, 11, 4, 5]
SUBJECT_ID_TEST = ['S'+str(x) for x in SUBJECT_ID_TEST]
SUBJECT_ID_TEST = 'S9'

subject_id_test = SUBJECT_ID_TEST
# # 'GM1', 'EK1', 'NM1', 'RY1', 'KSG1', 'AD1', 'NM3', 'SJ1', 'BK1', 'RY2', 'GM2', 'MT1', 'NM2'
# SUBJECT_ID_TEST = 'AD1' # SJ1

MODEL_NAME = 'SCL_1024_LARS_SGD'
SAVE_MODEL_DIR = 'Output'
NAME_DATASET = 'WESAD'

##### READ DATASET #####
if NAME_DATASET == 'WESAD':
    DATA_DIR = '/home/nvtu/PhD_Work/StressDetection/DATA/MyDataset/WESAD'
    data_group = np.load(f'{DATA_DIR}/{NAME_DATASET}_WRIST_groups_1.npy')
    data_gt = np.load(f'{DATA_DIR}/{NAME_DATASET}_WRIST_ground_truth_1.npy')
    data_ft = np.load(f'{DATA_DIR}/{NAME_DATASET}_WRIST_stats_feats_1.npy')
else:
    DATA_DIR = '/home/nvtu/PhD_Work/StressDetection/DATA/MyDataset/AffectiveROAD_Data/Database'
    NAME_DATASET = 'AffectiveROAD'
    data_group = np.load(f'{DATA_DIR}/{NAME_DATASET}_groups_1.npy')
    data_gt = np.load(f'{DATA_DIR}/{NAME_DATASET}_ground_truth_1.npy')
    data_ft = np.load(f'{DATA_DIR}/{NAME_DATASET}_stats_feats_1.npy')
    indices = np.where(data_gt >= 0)[0]
    data_ft = data_ft[indices]
    data_group = data_group[indices]
    data_gt = data_gt[indices]
    
# data_ft_con = np.load(f'{SAVE_MODEL_DIR}/{NAME_DATASET}/EmbedFt/EmbedFt_{MODEL_NAME}_{subject_id_test}.npy')

# Create dataframe for dataset
column_values = [f'f{x}' for x in range(data_ft.shape[1])]
data_full = pd.DataFrame(data = data_ft,  
                         columns = column_values)
data_full['subject_id'] = data_group
data_full['label'] = data_gt
list_subject_id = np.unique(data_full['subject_id']).tolist()

In [3]:
data_full

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f52,f53,f54,f55,f56,f57,f58,f59,subject_id,label
0,71.001875,11.594887,853.642086,154.521049,2.339071,0.333258,109.0,78.417266,94.244604,1125.00,...,-0.002344,0.002462,-0.018732,0.003601,21.886141,0.890333,0.000010,0.003222,S14,0
1,71.275764,11.993190,852.068345,156.208126,2.194700,0.313355,110.0,79.136691,94.964029,1125.00,...,-0.001870,0.002883,-0.009692,0.002393,21.886140,0.801697,0.000006,0.002370,S14,0
2,70.995376,11.993262,854.428957,158.174694,2.008055,0.290720,110.0,79.136691,94.964029,1125.00,...,-0.001849,0.002447,-0.009703,0.002347,21.886139,0.813464,0.000006,0.002419,S14,0
3,70.929045,12.084673,854.991007,159.511131,1.910564,0.295406,109.0,78.417266,94.244604,1125.00,...,-0.001749,0.003175,-0.009677,0.002320,21.886140,0.762883,0.000005,0.002215,S14,0
4,71.047971,12.048325,853.866906,159.098395,1.924567,0.293828,109.0,78.417266,94.964029,1125.00,...,-0.001919,0.001426,-0.009659,0.002218,21.886143,0.726163,0.000005,0.002130,S14,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35953,66.946620,12.147755,906.846374,169.934462,0.147691,-0.270101,114.0,87.022901,96.183206,968.75,...,-0.001389,0.001393,-0.007822,0.001640,21.886119,0.537796,0.000002,0.001557,S16,0
35954,66.850106,12.281809,906.129808,170.393058,0.133094,-0.258618,113.0,86.923077,96.153846,968.75,...,-0.001420,0.001235,-0.007863,0.001588,21.886119,0.516027,0.000002,0.001504,S16,0
35955,66.560432,12.319285,909.855769,171.456485,0.095429,-0.301767,114.0,87.692308,96.153846,968.75,...,-0.001361,0.001216,-0.007829,0.001570,21.886118,0.519307,0.000002,0.001503,S16,0
35956,66.629416,12.310309,909.735577,171.354097,0.100256,-0.301718,113.0,86.923077,96.153846,968.75,...,-0.001382,0.001223,-0.007867,0.001576,21.886118,0.511775,0.000002,0.001492,S16,0


In [17]:
data_test = data_full[data_full.subject_id == subject_id_test]
data_train = data_full[data_full.subject_id != subject_id_test]

# split test sets
X_test = data_test.iloc[:,:-2].to_numpy()
y_test = data_test.iloc[:,-1].to_numpy()

# split into train - validate
X_train = data_train.iloc[:,:-2].to_numpy()
y_train = data_train.iloc[:,-1].to_numpy()

# Scaler Data
# s_scaler = StandardScaler()
# r_scaler = RobustScaler()
# X_train_hr = X_train[:,0:25]
# X_train_eda = X_train[:,25:]
# scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

In [18]:
# clf = RandomForestClassifier(n_estimators = 1000, random_state = 0, 
#                              n_jobs = -1, max_features='sqrt', 
#                              oob_score=True, bootstrap=True, class_weight = 'balanced')

clf = SVC(kernel='rbf', random_state=0, 
          class_weight='balanced', C=10)

In [19]:
clf.fit(X_train, y_train)

RandomForestClassifier(class_weight='balanced', max_features='sqrt',
                       n_estimators=1000, n_jobs=-1, oob_score=True,
                       random_state=0)

In [20]:
# Prediction
Y_pred_test = clf.predict(X_test)
Y_pred_train = clf.predict(X_train)
acc_test = accuracy_score(y_test, Y_pred_test)
acc_train = accuracy_score(y_train, Y_pred_train)

f1_test = f1_score(y_test, Y_pred_test)
f1_train = f1_score(y_train, Y_pred_train)

bacc_test = balanced_accuracy_score(y_test, Y_pred_test)
bacc_train = balanced_accuracy_score(y_train, Y_pred_train)

In [8]:
bacc_train, bacc_test # SCL

(1.0, 0.7619047619047619)

In [9]:
f1_train, f1_test # SCL

(1.0, 0.6875000000000001)

In [8]:
# SCL_128
bacc_train, bacc_test, f1_train, f1_test

(1.0, 0.7711604749138261, 1.0, 0.7028360049321825)

In [9]:
# Original
bacc_train, bacc_test, f1_train, f1_test

(1.0, 0.8085714285714285, 1.0, 0.7632508833922261)

In [11]:
# Original - S17
bacc_train, bacc_test, f1_train, f1_test

(1.0, 0.8739635157545605, 1.0, 0.855787476280835)

In [8]:
# Original - Robust Scaler
bacc_train, bacc_test, f1_train, f1_test

(1.0, 0.8095238095238095, 1.0, 0.7647058823529412)

In [10]:
# SCL_1024
bacc_train, bacc_test, f1_train, f1_test

(1.0, 0.7961904761904761, 1.0, 0.7440191387559809)

In [21]:
# SCL_1024_LARS_SGD
bacc_train, bacc_test, f1_train, f1_test

(1.0, 0.7685714285714286, 1.0, 0.6988847583643123)