# Imports

In [31]:
import pandas as pd
import glob
import os
from tqdm import tqdm

# Configs

# Import Data

Used Dataset:
- [WESAD: Multimodal Dataset for Wearable Stress and Affect Detection](https://ubicomp.eti.uni-siegen.de/home/datasets/icmi18/): Exclusively importing the available pickle files from each subject (S2.pkl, S2.pkl, ...) and combining them
- Self-Generated Dataset (ECGPrivate):
    - Device: Bioplux Chest (and Finger) Band
    - Dataframes are modeled following the already implemented pydantic models

In [125]:
def dict_to_dataframe_WESAD(dict):
    df = pd.DataFrame()

    df["ECG"] = dict["signal"]["chest"]["ECG"].flatten()
    df["label"] = dict["label"].flatten()
    df["subject"] = dict["subject"]

    return df

In [139]:
from typing import Dict
def combine_pickles_to_df(pattern, wesad_ecg=False):
    """
    Load multiple pickle files that contain similar pandas DataFrames and combine them into a single DataFrame.

    Args:
        pattern (str): A glob pattern that matches the pickle files to load.
        wesad (bool): Whether to convert dictionary objects to DataFrames (used for WESAD dataset).

    Returns:
        A pandas DataFrame that combines the loaded DataFrames.
    """
    dfs = []
    for file_path in tqdm(glob.glob(pattern)):
        with open(file_path, 'rb') as f:
            try:
                df = pd.read_pickle(f)
            except AttributeError:
                raise ValueError(f"Expected a pandas DataFrame, but got {type(df)}")
            if isinstance(df, Dict) and wesad_ecg:
                df = dict_to_dataframe_WESAD(df)
            dfs.append(df)
    combined_df = pd.concat(dfs, ignore_index=True)
    return combined_df

### WESAD Data

In [137]:
%%time
# load WESAD
df = combine_pickles_to_df('../data/S15.pkl', wesad_ecg=True)

100%|██████████| 1/1 [00:09<00:00,  9.46s/it]

CPU times: total: 6.55 s
Wall time: 9.56 s





In [141]:
df.describe()

Unnamed: 0,ECG,label
count,3676400.0,3676400.0
mean,0.001106015,1.464774
std,0.288565,1.626752
min,-1.257111,0.0
25%,-0.1009369,0.0
50%,-0.01826477,1.0
75%,0.02069092,2.0
max,1.499496,7.0


### Self-generated dataset

# Preprocessing

In [None]:
#ecgs = np.array(ecg_batch.ecgs)

# Detrend signals
#ecgs -= np.mean(ecgs, axis=1, keepdims=True)