In [1]:
import numpy as np
import os
import os.path as osp
from DatasetLoader.dataset_loader import DatasetLoader
from DatasetLoader.signal_processing import *
from collections import defaultdict
from Classifiers.classifier_strategy import TryMLClassifierStrategy
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from collections import defaultdict
from typing import Tuple
from tqdm import tqdm

In [2]:
dataset_loader = DatasetLoader()
wesad_gsr_data, ground_truth = dataset_loader.load_wesad_gsr_dataset()

In [3]:
single_signal_data = select_single_signal(wesad_gsr_data, 0)

In [4]:
interval_gsr_data, interval_ground_truth, interval_group = dataset_loader.divide_into_intervals(single_signal_data, ground_truth, 60, sampling_rate = 700)

## General cross-population stress detection model


### One-minute interval split

In [5]:
flatten_agg_data = dataset_loader.flatten(interval_gsr_data)
flatten_agg_ground_truth = dataset_loader.flatten(interval_ground_truth)
flatten_agg_group = dataset_loader.flatten(interval_group)
print(f"Number of subjects: {len(list(set(flatten_agg_group)))}")
print(f"Number of samples: {len(flatten_agg_data)}")

Number of subjects: 15
Number of samples: 788


In [6]:
def extract_features(preprocessed_data: np.array, sampling_rate: int = 5) -> np.array:
    # Extract features from the data
    processed_features = [extract_gsr_features(microsiemens, sampling_rate=sampling_rate) for microsiemens in preprocessed_data]

    # Extract statistic features
    statistic_features = np.array([statistics_gsr_signal_features(feat) for feat in processed_features])
    return statistic_features

In [7]:
preprocessed_ground_truth = flatten_agg_ground_truth
preprocessed_groups = flatten_agg_group
statistic_features = extract_features(flatten_agg_data, sampling_rate = 700)

### Binary Classification

In [8]:
binary_ground_truth = preprocessed_ground_truth.flatten()

In [9]:
X = statistic_features
y = binary_ground_truth

In [10]:
try_ml_strategies = TryMLClassifierStrategy(X, y, groups=preprocessed_groups)
try_ml_strategies.try_different_strategies(group_validation = True)

Try Logistic Regression...
Groups: [['S11', 'S10', 'S5'], ['S15', 'S4', 'S8', 'S9'], ['S7', 'S14', 'S2', 'S17'], ['S13', 'S3', 'S6', 'S16']]
ROC AUC Score: [0.8256549232158987, 0.8199893955461295, 0.7206630988121654, 0.8907708722325953]
Mean accuracy of train set: 0.8709954149785981
Mean accuracy of test set: 0.846426959336592
--------------------------------------
Try Random Forests...
Groups: [['S11', 'S10', 'S5'], ['S15', 'S4', 'S8', 'S9'], ['S7', 'S14', 'S2', 'S17'], ['S13', 'S3', 'S6', 'S16']]
ROC AUC Score: [0.7070912375790425, 0.8727465535524921, 0.8062916068398382, 0.9389170445452121]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.8499699076163824
15 features remain
------After ensemble feature selection------
Groups: [['S11', 'S10', 'S5'], ['S15', 'S4', 'S8', 'S9'], ['S7', 'S14', 'S2', 'S17'], ['S13', 'S3', 'S6', 'S16']]
ROC AUC Score: [0.7367886178861788, 0.8800371155885471, 0.814776138885263, 0.9429847959455855]
Mean accuracy of train set: 1.0
Mean accuracy of 

## Person-specific stress detection model

### Binary classification


In [11]:
# Prepare personal-specific stress detection model
person_specific_dataset = defaultdict(dict)
person_specific_ground_truth = defaultdict(dict)
for participant_id, data in single_signal_data.items():
    person_specific_data, person_specific_gt = dataset_loader.prepare_person_specific_dataset(data, ground_truth)
    person_specific_dataset[participant_id] = person_specific_data
    person_specific_ground_truth[participant_id] = person_specific_gt

In [13]:
for participant_id, data in person_specific_dataset.items():
    print(f"Processing {participant_id}")
    person_specific_gt = person_specific_ground_truth[participant_id].flatten()
    if len(list(set(person_specific_gt))) < 2: # If the data contains non-stress value, then continue
        continue
    person_specific_data, person_specific_gt = dataset_loader.divide_person_specific_data_into_intervals(data, person_specific_gt, num_samples=60, sampling_rate=700)
    person_specific_statistic_features = extract_features(person_specific_data, sampling_rate = 700)
    try_ml_strategies = TryMLClassifierStrategy(person_specific_statistic_features, person_specific_gt)
    try_ml_strategies.try_different_strategies(cross_validation = True)
    print("*****************************************")

OC AUC Score: [1.0, 1.0, 0.9807692307692307]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.9226579520697168
10 features remain
------After ensemble feature selection------
ROC AUC Score: [1.0, 1.0, 1.0]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.8627450980392157
--------------------------------------
Try SVM Classifier...
ROC AUC Score: [0.5, 0.5, 0.5]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.7549019607843137
--------------------------------------
Try MLPClassifier...
ROC AUC Score: [1.0, 0.8307692307692308, 0.7884615384615384]
Mean accuracy of train set: 0.8497354497354497
Mean accuracy of test set: 0.8485838779956426
--------------------------------------
Try 10-nearest-neighbors...
ROC AUC Score: [1.0, 1.0, 0.9423076923076923]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.9237472766884531
--------------------------------------
Try decision trees...
ROC AUC Score: [0.5, 1.0, 0.7307692307692308]
Mean accuracy of train

In [14]:
for participant_id, data in person_specific_dataset.items():
    print(f"Processing {participant_id}")
    person_specific_gt = person_specific_ground_truth[participant_id].flatten()
    dataset_loader.class_percentage_analysis(person_specific_gt)

Processing S10
0: 0.8
1: 0.2
Processing S11
0: 0.8
1: 0.2
Processing S13
0: 0.8
1: 0.2
Processing S14
0: 0.8
1: 0.2
Processing S15
0: 0.8
1: 0.2
Processing S16
0: 0.8
1: 0.2
Processing S17
0: 0.8
1: 0.2
Processing S2
0: 0.8
1: 0.2
Processing S3
0: 0.8
1: 0.2
Processing S4
0: 0.8
1: 0.2
Processing S5
0: 0.8
1: 0.2
Processing S6
0: 0.8
1: 0.2
Processing S7
0: 0.8
1: 0.2
Processing S8
0: 0.8
1: 0.2
Processing S9
0: 0.8
1: 0.2


# Downsampling the high-frequency EDA signal to low-frequency EDA signal & Classify

In [15]:
downsampled_wesad_signal_data = resampling_data_signal(single_signal_data, sampling_rate = 700, desired_sampling_rate = 5)

In [16]:
interval_gsr_data, interval_ground_truth, interval_group = dataset_loader.divide_into_intervals(downsampled_wesad_signal_data, ground_truth, 60)

## General cross-population stress detection model


### One-minute interval split

In [17]:
flatten_agg_data = dataset_loader.flatten(interval_gsr_data)
flatten_agg_ground_truth = dataset_loader.flatten(interval_ground_truth)
flatten_agg_group = dataset_loader.flatten(interval_group)
print(f"Number of subjects: {len(list(set(flatten_agg_group)))}")
print(f"Number of samples: {len(flatten_agg_data)}")

Number of subjects: 15
Number of samples: 788


In [18]:
preprocessed_ground_truth = flatten_agg_ground_truth
preprocessed_groups = flatten_agg_group
statistic_features = extract_features(flatten_agg_data)

### Binary Classification

In [19]:
binary_ground_truth = preprocessed_ground_truth.flatten()

In [20]:
X = statistic_features
y = binary_ground_truth

In [21]:
try_ml_strategies = TryMLClassifierStrategy(X, y, groups=preprocessed_groups)
try_ml_strategies.try_different_strategies(group_validation = True)

Try Logistic Regression...
Groups: [['S11', 'S10', 'S5'], ['S15', 'S4', 'S8', 'S9'], ['S7', 'S14', 'S2', 'S17'], ['S13', 'S3', 'S6', 'S16']]
ROC AUC Score: [0.7687443541102078, 0.8825556733828208, 0.8077274507244485, 0.9325153374233128]
Mean accuracy of train set: 0.8722609112509967
Mean accuracy of test set: 0.8512622680199642
--------------------------------------
Try Random Forests...
Groups: [['S11', 'S10', 'S5'], ['S15', 'S4', 'S8', 'S9'], ['S7', 'S14', 'S2', 'S17'], ['S13', 'S3', 'S6', 'S16']]
ROC AUC Score: [0.7034778681120144, 0.8514713679745494, 0.7720271505025453, 0.9277140570818886]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.8432145973854017
9 features remain
------After ensemble feature selection------
Groups: [['S11', 'S10', 'S5'], ['S15', 'S4', 'S8', 'S9'], ['S7', 'S14', 'S2', 'S17'], ['S13', 'S3', 'S6', 'S16']]
ROC AUC Score: [0.7554200542005419, 0.8203207847295864, 0.6770656572249053, 0.9057081888503601]
Mean accuracy of train set: 1.0
Mean accuracy of

## Person-specific stress detection model

### Binary classification


In [22]:
# Prepare personal-specific stress detection model
person_specific_dataset = defaultdict(dict)
person_specific_ground_truth = defaultdict(dict)
for participant_id, data in single_signal_data.items():
    person_specific_data, person_specific_gt = dataset_loader.prepare_person_specific_dataset(data, ground_truth)
    person_specific_dataset[participant_id] = person_specific_data
    person_specific_ground_truth[participant_id] = person_specific_gt

In [24]:
for participant_id, data in person_specific_dataset.items():
    print(f"Processing {participant_id}")
    person_specific_gt = person_specific_ground_truth[participant_id].flatten()
    if len(list(set(person_specific_gt))) < 2: # If the data contains non-stress value, then continue
        continue
    person_specific_data, person_specific_gt = dataset_loader.divide_person_specific_data_into_intervals(data, person_specific_gt, num_samples=60)
    person_specific_statistic_features = extract_features(person_specific_data)
    try_ml_strategies = TryMLClassifierStrategy(person_specific_statistic_features, person_specific_gt)
    try_ml_strategies.try_different_strategies(cross_validation = True)
    print("*****************************************")

Processing S10
Try Logistic Regression...
ROC AUC Score: [1.0, 0.9998660750678359, 0.9151421935323499]
Mean accuracy of train set: 0.9428989751098097
Mean accuracy of test set: 0.8103459217999623
8 features remain
------After recursive feature elimination------
ROC AUC Score: [1.0, 1.0, 0.8565431940528069]
Mean accuracy of train set: 0.9305584605731019
Mean accuracy of test set: 0.833906496176095
--------------------------------------
Try Random Forests...
ROC AUC Score: [0.9990751415916492, 1.0, 0.7783221862304149]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.835021789637687
4 features remain
------After ensemble feature selection------
ROC AUC Score: [0.8492907801418439, 1.0, 0.7456188389923329]
Mean accuracy of train set: 0.9999302795788886
Mean accuracy of test set: 0.8340459078587941
--------------------------------------
Try Extra Trees Classifier...
ROC AUC Score: [1.0, 1.0, 0.8700755633753582]
Mean accuracy of train set: 1.0
Mean accuracy of test set: 0.83697355

ValueError: The length of the input vector x must be greater than padlen, which is 9.