In [1]:
import os
import os.path as osp
import numpy as np
import configparser
from classifiers import BinaryClassifier
import pandas as pd
from sklearn.model_selection import LeaveOneGroupOut
from tqdm import tqdm
from icecream import ic

# Load eda statistical features and ground-truth from datasets

In [2]:
def get_dataset_folder_path(dataset_name: str) -> str:
    # Read dataset path from config.ini file
    config_path = osp.join(os.getcwd(), 'config.ini')
    parser = configparser.ConfigParser()
    parser.read(config_path)
    dataset_folder_path = None
    if dataset_name == 'AffectiveROAD':
        dataset_folder_path = parser['DATA_PATH']['affectiveROAD_dataset_path']
    elif dataset_name in ['WESAD_CHEST', 'WESAD_WRIST', 'RESAMPLED_WESAD_CHEST']:
        dataset_folder_path = parser['DATA_PATH']['wesad_dataset_path']
    elif dataset_name == 'DCU_NVT_EXP1':
        dataset_folder_path = parser['DATA_PATH']['dcu_nvt_dataset_path']
    return dataset_folder_path

In [3]:
def load_dataset(dataset_name: str):
    dataset = None
    ground_truth = None
    # Initialize dataset folder path
    dataset_folder_path = get_dataset_folder_path(dataset_name)
    # Initialize dataset file path
    dataset_file_path = osp.join(dataset_folder_path, f'{dataset_name}_stats_feats.npy')
    # Initialize ground-truth file path
    ground_truth_file_path = osp.join(dataset_folder_path, f'{dataset_name}_ground_truth.npy')
    # Initialize group file path
    group_file_path = osp.join(dataset_folder_path, f'{dataset_name}_groups.npy')

    # Load dataset, ground-truth, and groups
    dataset = np.load(dataset_file_path) # Load dataset
    ground_truth = np.load(ground_truth_file_path) # Load corresponding ground-truth
    groups = np.load(group_file_path) # Load corresponding user_id labels
    return dataset, ground_truth, groups

# Define functions to get output folder path and save results

In [4]:
def get_output_folder_path(dataset_name: str) -> str:
    config_path = osp.join(os.getcwd(), 'config.ini')
    parser = configparser.ConfigParser()
    parser.read(config_path)
    # Get output_folder_path for a specific dataset
    output_folder_path = osp.join(parser['DATA_PATH']['result_path'], dataset_name)
    # Create the output folder if it does not exist
    if not osp.exists(output_folder_path):
        os.makedirs(output_folder_path)
    return output_folder_path

In [5]:
def dump_result_to_csv(results, dataset_name: str, detection_strategy: str, detector_type: str):
    output_folder_path = osp.join(get_output_folder_path(dataset_name), detector_type)
    # Create the folder if it does not exist
    if not osp.exists(output_folder_path):
        os.makedirs(output_folder_path)
    # Get output_file_path
    output_file_path = osp.join(output_folder_path, f'{dataset_name}-{detection_strategy}.csv')
    # Generate DataFrame to save to csv format
    df = pd.DataFrame.from_dict(results)
    df.to_csv(output_file_path, index=False)    

## Load dataset and ground-truth as well as dividing groups

In [6]:
# -- Uncomment the dataset that you wanna load -- #
dataset_name = 'AffectiveROAD'
# dataset_name = 'WESAD_CHEST'
# dataset_name = 'WESAD_WRIST'
# dataset_name = 'DCU_NVT_EXP1'

In [7]:
dataset, ground_truth, groups = load_dataset(dataset_name) # Load dataset and ground-truths

# Define stress detection strategies

In [8]:
# -- Uncomment the detection strategy that you wanna use to detect -- #
detection_strategy = 'logistic_regression'
detection_strategy = 'random_forest'
# detection_strategy = 'svm'
# detection_strategy = 'mlp'
# detection_strategy = 'knn'

In [9]:
SCORING = 'balanced_accuracy'

# Build General Cross-population Stress Detector

In [10]:
# clf = BinaryClassifier(dataset, ground_truth, detection_strategy, logo_validation = True, groups = groups, scoring = SCORING)
# results = clf.exec_classifier() # Build classifier and return prediction results

In [11]:
# Save results
# detector_type = 'General'
# dump_result_to_csv(results, dataset_name, detection_strategy, detector_type)

# Build Person-specific Stress Detector

In [12]:
logo = LeaveOneGroupOut()
balanced_accs = []
test_groups = []
for _, test_index in tqdm(logo.split(dataset, ground_truth, groups)):
    user_dataset, user_ground_truth = dataset[test_index], ground_truth[test_index] # Get personal statistical features and ground-truths of a user

    # Re-initialize classifier when iterating a new user
    clf = BinaryClassifier(user_dataset, user_ground_truth, detection_strategy, cross_validation = True, scoring = SCORING)
    balanced_acc = clf.exec_classifier()

    if balanced_acc == -1:
        print(groups[test_index][0])
        continue # Ignore this user as it only contains one class

    # Save user_id and his/her corresponding predicted results
    balanced_accs.append(balanced_acc)
    test_groups.append(groups[test_index][0])
results = { 'groups': test_groups, 'balanced_accurary_score': balanced_accs }

0it [00:00, ?it/s]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


1it [00:12, 12.58s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


2it [00:25, 12.85s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


3it [00:38, 13.02s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


4it [00:51, 12.88s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


5it [01:04, 12.89s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


6it [01:17, 12.96s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


7it [01:31, 13.23s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


8it [01:44, 13.17s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


9it [01:57, 13.18s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


10it [02:10, 13.15s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


11it [02:23, 13.07s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


12it [02:36, 12.94s/it]

random_forest best grid search score: 1.0 with params - {'class_weight': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}


13it [02:49, 13.01s/it]


In [13]:
# Save results
detector_type = 'Personal'
dump_result_to_csv(results, dataset_name, detection_strategy, detector_type)