In [1]:
from pathlib import Path
from IPython.display import display
import math
import os
import io
import sys
from tqdm import tqdm
import contextlib

from braindecode.preprocessing import create_fixed_length_windows
from braindecode.datasets.base import EEGWindowsDataset, BaseConcatDataset, BaseDataset
from braindecode.models import EEGNeX
from eegdash import EEGChallengeDataset

import pandas as pd
import numpy as np
import pickle

pd.set_option('display.max_columns', 50)

device = "cuda"
SFREQ = 100

In [2]:
# save the source data in root directory
DATA_DIR = Path("../../data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

data_home = "../../artefacts/data"

In [3]:
def prepare_full_dataset(data_dir, release_list):
    all_datasets_list = []
    buf = io.StringIO()

    for release in tqdm(release_list, file=sys.stdout):
        with contextlib.redirect_stderr(buf):
            ds = EEGChallengeDataset(
                release=release,
                task="contrastChangeDetection",
                mini=False,
                description_fields=[
                    "subject","session","run","task",
                    "age","gender","sex","p_factor",
                ],
                cache_dir=data_dir,
            )
        all_datasets_list.append(ds)

    sub_rm = ["NDARWV769JM7", "NDARME789TD2", "NDARUA442ZVF", "NDARJP304NK1",
          "NDARTY128YLU", "NDARDW550GU6", "NDARLD243KRE", "NDARUJ292JXV", "NDARBA381JGH"]

    print("Merging step")
    all_datasets = BaseConcatDataset(all_datasets_list)

    print("Filtration step")
    all_datasets = BaseConcatDataset(
        [
            ds
            for ds in all_datasets.datasets
            if not ds.description.subject in sub_rm
            and ds.raw.n_times >= 4 * SFREQ
            and len(ds.raw.ch_names) == 129
            and not math.isnan(ds.description["externalizing"])
        ]
    )
    return all_datasets

In [4]:
release_list = ["R" + str(idx) for idx in range(11, 12)]
print(f"Release list: {release_list}")

run_level_datasets = prepare_full_dataset(DATA_DIR, release_list)
run_level_datasets.description

Release list: ['R11']
100%|██████████| 1/1 [00:05<00:00,  5.02s/it]
Merging step
Filtration step


Unnamed: 0,subject,run,task,age,sex,p_factor,release_number,ehq_total,commercial_use,full_pheno,attention,internalizing,externalizing,restingstate,despicableme,funwithfractals,thepresent,diaryofawimpykid,contrastchangedetection_1,contrastchangedetection_2,contrastchangedetection_3,surroundsupp_1,surroundsupp_2,seqlearning6target,seqlearning8target,symbolsearch
0,NDARAB678VYW,2,contrastChangeDetection,20.1817,M,0.887,R11,100.00,Yes,Yes,0.032,0.101,-0.825,available,available,available,available,available,available,available,available,available,available,unavailable,unavailable,available
1,NDARAB678VYW,1,contrastChangeDetection,20.1817,M,0.887,R11,100.00,Yes,Yes,0.032,0.101,-0.825,available,available,available,available,available,available,available,available,available,available,unavailable,unavailable,available
2,NDARAB678VYW,3,contrastChangeDetection,20.1817,M,0.887,R11,100.00,Yes,Yes,0.032,0.101,-0.825,available,available,available,available,available,available,available,available,available,available,unavailable,unavailable,available
3,NDARAB683CYD,1,contrastChangeDetection,7.2728,M,-1.487,R11,33.35,Yes,No,0.118,-0.648,-0.450,unavailable,caution,caution,caution,caution,caution,unavailable,unavailable,caution,unavailable,unavailable,unavailable,caution
4,NDARAC296UCB,1,contrastChangeDetection,22.0002,M,0.015,R11,80.04,Yes,No,-1.159,0.098,-0.668,caution,caution,unavailable,unavailable,caution,caution,caution,unavailable,caution,unavailable,unavailable,unavailable,caution
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,NDARZV406UFK,1,contrastChangeDetection,12.0888,M,-0.390,R11,93.34,Yes,Yes,-0.201,1.023,0.228,caution,caution,caution,caution,caution,caution,caution,caution,caution,caution,unavailable,unavailable,caution
754,NDARZV406UFK,2,contrastChangeDetection,12.0888,M,-0.390,R11,93.34,Yes,Yes,-0.201,1.023,0.228,caution,caution,caution,caution,caution,caution,caution,caution,caution,caution,unavailable,unavailable,caution
755,NDARZW623WYG,1,contrastChangeDetection,11.9490,M,0.610,R11,93.34,Yes,Yes,-0.353,0.018,-0.309,unavailable,caution,caution,caution,caution,caution,caution,caution,caution,caution,unavailable,unavailable,caution
756,NDARZW623WYG,2,contrastChangeDetection,11.9490,M,0.610,R11,93.34,Yes,Yes,-0.353,0.018,-0.309,unavailable,caution,caution,caution,caution,caution,caution,caution,caution,caution,unavailable,unavailable,caution


In [5]:
# Create 4-seconds windows with 2-seconds stride
windows_ds = create_fixed_length_windows(
    run_level_datasets,
    window_size_samples=4 * SFREQ,
    window_stride_samples=2 * SFREQ,
    drop_last_window=True,
)

In [6]:
import pickle
with open("../../artefacts/data/windows_ds_ch2_release_11.pkl", "wb") as f:
    pickle.dump(windows_ds, f)