In [1]:
import os
import os.path as osp
import numpy as np
import configparser
from classifiers import BinaryClassifier
import pandas as pd
from sklearn.model_selection import LeaveOneGroupOut
from tqdm import tqdm

# Load eda statistical features and ground-truth from datasets

In [2]:
def get_dataset_folder_path(dataset_name: str) -> str:
    # Read dataset path from config.ini file
    config_path = osp.join(os.getcwd(), 'config.ini')
    parser = configparser.ConfigParser()
    parser.read(config_path)
    dataset_folder_path = None
    if dataset_name == 'AffectiveROAD':
        dataset_folder_path = parser['DATA_PATH']['affectiveROAD_dataset_path']
    elif dataset_name in ['WESAD_CHEST', 'WESAD_WRIST', 'RESAMPLED_WESAD_CHEST']:
        dataset_folder_path = parser['DATA_PATH']['wesad_dataset_path']
    elif dataset_name == 'DCU_NVT_EXP1':
        dataset_folder_path = parser['DATA_PATH']['dcu_nvt_dataset_path']
    return dataset_folder_path

In [13]:
def load_dataset(dataset_name: str):
    dataset = None
    ground_truth = None
    # Initialize dataset folder path
    dataset_folder_path = get_dataset_folder_path(dataset_name)
    # Initialize dataset file path
    dataset_file_path = osp.join(dataset_folder_path, f'{dataset_name}_heart_stats_feats_1_60.npy')
    # Initialize ground-truth file path
    ground_truth_file_path = osp.join(dataset_folder_path, f'{dataset_name}_ground_truth_1_60.npy')
    # Initialize group file path
    group_file_path = osp.join(dataset_folder_path, f'{dataset_name}_groups_1_60.npy')


    # Load dataset, ground-truth, and groups
    dataset = np.load(dataset_file_path) # Load dataset
    ground_truth = np.load(ground_truth_file_path) # Load corresponding ground-truth
    groups = np.load(group_file_path) # Load corresponding user_id labels
    print(set(ground_truth))

    dataset[np.isnan(dataset)] = 0
    # Filtering preprocess if dataset name is AffectiveROAD
    if dataset_name == 'AffectiveROAD':
        indices = np.where(ground_truth >= 0)[0]
        dataset = dataset[indices]
        groups = groups[indices]
        ground_truth = ground_truth[indices]
        
    return dataset, ground_truth, groups

# Define functions to get output folder path and save results

In [14]:
def get_output_folder_path(dataset_name: str) -> str:
    config_path = osp.join(os.getcwd(), 'config.ini')
    parser = configparser.ConfigParser()
    parser.read(config_path)
    # Get output_folder_path for a specific dataset
    output_folder_path = osp.join(parser['DATA_PATH']['result_path'], dataset_name)
    # Create the output folder if it does not exist
    if not osp.exists(output_folder_path):
        os.makedirs(output_folder_path)
    return output_folder_path

In [15]:
def dump_result_to_csv(results, dataset_name: str, detection_strategy: str, detector_type: str):
    output_folder_path = osp.join(get_output_folder_path(dataset_name), detector_type)
    # Create the folder if it does not exist
    if not osp.exists(output_folder_path):
        os.makedirs(output_folder_path)
    # Get output_file_path
    output_file_path = osp.join(output_folder_path, f'{dataset_name}-{detection_strategy}-heart_feat_selection.csv')
    # Generate DataFrame to save to csv format
    df = pd.DataFrame.from_dict(results)
    df.to_csv(output_file_path, index=False)    

## Load dataset and ground-truth as well as dividing groups

In [16]:
# -- Uncomment the dataset that you wanna load -- #
dataset_name = 'AffectiveROAD'
# dataset_name = 'WESAD_CHEST'
# dataset_name = 'WESAD_WRIST'
# dataset_name = 'DCU_NVT_EXP1'

In [17]:
dataset, ground_truth, groups = load_dataset(dataset_name) # Load dataset and ground-truths

{0, 1, -1}


# Define stress detection strategies

In [8]:
# -- Uncomment the detection strategy that you wanna use to detect -- #
# detection_strategy = 'logistic_regression'
detection_strategy = 'random_forest'
# detection_strategy = 'svm'
# detection_strategy = 'mlp'
# detection_strategy = 'knn'

In [53]:
SCORING = 'balanced_accuracy'

# Build General Cross-population Stress Detector

In [54]:
clf = BinaryClassifier(dataset, ground_truth, detection_strategy, logo_validation = True, groups = groups, scoring = SCORING)
results, feature_importances = clf.exec_classifier() # Build classifier and return prediction results

15it [02:57, 11.85s/it]


In [56]:
_feature_importances = np.array([np.argsort(x)[-20:][::-1] for x in feature_importances])
from collections import Counter
aaa = dict(Counter(_feature_importances.ravel()))
print(aaa)
print(sorted(aaa))
print(len(aaa))

{1: 15, 6: 15, 21: 15, 22: 15, 5: 15, 0: 15, 2: 15, 10: 15, 3: 15, 7: 15, 23: 15, 16: 15, 17: 15, 8: 15, 9: 15, 24: 15, 4: 15, 18: 15, 20: 15, 14: 3, 11: 8, 12: 4}
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 24]
22


In [21]:
# Save results
# detector_type = 'General'
# dump_result_to_csv(results, dataset_name, detection_strategy, detector_type)

# Build Person-specific Stress Detector

In [35]:
logo = LeaveOneGroupOut()
balanced_accs = []
test_groups = []
for _, test_index in tqdm(logo.split(dataset, ground_truth, groups)):
    user_dataset, user_ground_truth = dataset[test_index], ground_truth[test_index] # Get personal statistical features and ground-truths of a user

    # Re-initialize classifier when iterating a new user
    clf = BinaryClassifier(user_dataset, user_ground_truth, detection_strategy, cross_validation = True, scoring = SCORING)
    balanced_acc = clf.exec_classifier()

    if balanced_acc == -1:
        print(groups[test_index][0])
        continue # Ignore this user as it only contains one class

    # Save user_id and his/her corresponding predicted results
    balanced_accs.append(balanced_acc)
    test_groups.append(groups[test_index][0])
results = { 'groups': test_groups, 'balanced_accurary_score': balanced_accs }



[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 89 90
 91 92 93 94]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  42  95  96  97  98  99 100 101]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60
  61  62  63 102 103 104 105 106 107 108]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81
  82  83  84 109 110 111 112 113 114 115]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, '



[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 56 57
 58 59 60 61]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 62 63 64
 65 66 67 68]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[43 44 45 46 47 48 49 50 51 52 53 54 55 69 70 71 72 73 74 87 88 89 90 91
 92 93 94]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 75  76  77  78  79  80  95  96  97  98  99 100 101 102 103 104 105 106
 107 108 109 110 111 112 113 114 115]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 81  82  83  84  85  86 116 117 118 119 12



[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 90 91
 92 93 94 95]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  42  43  96  97  98  99 100 101]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61
  62  63  64 102 103 104 105 106 107 108]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82
  83  84  85 109 110 111 112 113 114]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_es



[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 56 57
 58 59 60 61]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 62 63 64
 65 66 67 68]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[43 44 45 46 47 48 49 50 51 52 53 54 55 69 70 71 72 73 74 87 88 89 90 91
 92 93 94]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 75  76  77  78  79  80  95  96  97  98  99 100 101 102 103 104 105 106
 107 108 109 110 111 112 113 114 115]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 81  82  83  84  85  86 116 117 118 119 12



[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 89 90
 91 92 93 94]
random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
[ 22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  42  95  96  97  98  99 100 101]


4it [05:08, 77.12s/it]


KeyboardInterrupt: 

In [None]:
# Save results
detector_type = 'Personal'
dump_result_to_csv(results, dataset_name, detection_strategy, detector_type)