In [1]:
import configparser
import os
import os.path as osp
import pandas as pd
from collections import defaultdict
import pickle
from typing import List, Dict
from tqdm import tqdm

# Initialize data paths


In [2]:
# Get dataset path
config_path = osp.join(osp.abspath(os.pardir), 'config.ini')
parser = configparser.ConfigParser()
parser.read(config_path)
wesad_dataset_path = parser['DATA_PATH']['wesad_dataset_path']
wesad_chest_dataset_path = osp.join(wesad_dataset_path, 'WESAD_CHEST')
wesad_wrist_dataset_path = osp.join(wesad_dataset_path, 'WESAD_WRIST')

# List of user ids
user_ids = os.listdir(wesad_chest_dataset_path)

# Objective ground-truth file path
ground_truth_path = osp.join(wesad_dataset_path, 'WESAD-Ground-Truth.csv')

# Transform dataset into a predefined format

In [3]:
# Load ground-truth
ground_truth = pd.read_csv(ground_truth_path)
ground_truth.head()

Unnamed: 0,INSTANCE,LABEL
0,S10_baseline_1.csv,0
1,S10_amusement_2.csv,0
2,S10_meditation_3.csv,0
3,S10_stress_4.csv,1
4,S10_meditation_5.csv,0


## Define function to load data, map data ground-truth, and finally check the integrity of the data

In [4]:
def load_dataset(dataset_path: str, user_ids: List[str]) -> Dict[str, Dict[str, List[float]]]:
    eda = defaultdict(dict)
    for user_id in tqdm(user_ids):
        user_data_path = osp.join(dataset_path, user_id)
        file_names = sorted(os.listdir(user_data_path), key = lambda file_name: file_name.split('_')[-1]) # Sort the files as order by its name index
        for file_name in file_names:
            data_file_path = osp.join(user_data_path, file_name)
            eda_signal = [line.rstrip() for line in open(data_file_path, 'r').readlines()][1:] # Load eda data and remove the first row of its as it is the header
            eda_signal = list(map(float, eda_signal))
            task_id = file_name # Task id is also its file name --> This is important to retrieve the ground-truth
            eda[user_id][task_id] = eda_signal
    return eda

In [5]:
def map_data_groundtruth(dataset: Dict[str, Dict[str, List[float]]], ground_truth: pd.DataFrame) -> Dict[str, Dict[str, List[int]]]:
    ground_truth = ground_truth.set_index('INSTANCE') # Set index of the ground-truth file to task_id for retrieval
    gt = defaultdict(dict)
    for user_id, data in tqdm(dataset.items()):
        for task_id, eda_signal in data.items():
            task_ground_truth = ground_truth.loc[task_id].values # Get task ground-truth
            len_eda_signal = len(eda_signal)
            gt[user_id][task_id] = task_ground_truth.tolist() * len_eda_signal # Duplicate ground-truth to label each eda signal
    return gt

In [6]:
def data_assertion(dataset: Dict[str, Dict[str, List[float]]], ground_truth: Dict[str, Dict[str, List[int]]]):
    for user_id, data in tqdm(dataset.items()):
        for task_id, eda_signal in data.items():
            len_eda_signal = len(eda_signal)
            len_gt = len(ground_truth[user_id][task_id])
            # Assert the length of the ground-truth == the length of eda signal
            if len_eda_signal != len_gt:
                print(user_id, task_id, 'Length not equal')
                print(len_eda_signal, len_gt)
            # Assert if the signal has missing values?
            if any(elem is None for elem in eda_signal):
                print(user_id, task_id, 'Has None value')

In [7]:
def dump_dataset_pickle(eda: Dict[str, Dict[str, List[float]]], ground_truth: Dict[str, Dict[str, List[int]]], file_path: str):
    data = { 'eda': eda, 'ground_truth': ground_truth }
    pickle.dump(data, open(file_path, 'wb'))

## Load WESAD_CHEST data

In [8]:
# Load eda data and its ground-truth
print("Load EDA_CHEST data...")
eda_wesad_chest = load_dataset(wesad_chest_dataset_path, user_ids)
print("Map ground-truth to each data signal")
gt_wesad_chest = map_data_groundtruth(eda_wesad_chest, ground_truth)

0%|          | 0/15 [00:00<?, ?it/s]Load EDA_CHEST data...
100%|██████████| 15/15 [00:28<00:00,  1.89s/it]
100%|██████████| 15/15 [00:00<00:00, 116.84it/s]Map ground-truth to each data signal



In [9]:
# Assert that the data is correct
data_assertion(eda_wesad_chest, gt_wesad_chest)

100%|██████████| 15/15 [00:01<00:00,  9.33it/s]


In [10]:
# Save dataset
output_file_path = osp.join(wesad_dataset_path, 'wesad_chest_dataset.pkl')
dump_dataset_pickle(eda_wesad_chest, gt_wesad_chest, output_file_path)

## Load WESAD_WRIST data

In [8]:
# Load eda data and its ground-truth
print("Load EDA_WRIST data...")
eda_wesad_wrist = load_dataset(wesad_wrist_dataset_path, user_ids)
print("Map ground-truth to each data signal")
gt_wesad_wrist = map_data_groundtruth(eda_wesad_wrist, ground_truth)

13%|█▎        | 2/15 [00:00<00:00, 17.91it/s]Load EDA_WRIST data...
100%|██████████| 15/15 [00:00<00:00, 17.35it/s]
100%|██████████| 15/15 [00:00<00:00, 1156.90it/s]Map ground-truth to each data signal



In [10]:
# Assert that the data is correct
data_assertion(eda_wesad_wrist, gt_wesad_wrist)

100%|██████████| 15/15 [00:00<00:00, 939.89it/s]


In [12]:
# Save dataset
output_file_path = osp.join(wesad_dataset_path, 'wesad_wrist_dataset.pkl')
dump_dataset_pickle(eda_wesad_wrist, gt_wesad_wrist, output_file_path)