In [2]:
import mne
import pandas
import dn3
from dn3.configuratron import ExperimentConfig
import os
from tqdm import tqdm
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
mapping = {'EEG FP1-REF': 'FP1', 'EEG FP2-REF': 'FP2', 'EEG F3-REF': 'F3', 'EEG F4-REF': 'F4', 'EEG C3-REF': 'C3', 'EEG C4-REF': 'C4', 'EEG P3-REF': 'P3', 'EEG P4-REF': 'P4', 'EEG O1-REF': 'O1', 'EEG O2-REF': 'O2', 'EEG F7-REF': 'F7', 'EEG F8-REF': 'F8', 'EEG T3-REF': 'T3', 'EEG T4-REF': 'T4', 'EEG T5-REF': 'T5', 'EEG T6-REF': 'T6', 'EEG A1-REF': 'A1', 'EEG A2-REF': 'A2', 'EEG FZ-REF': 'FZ', 'EEG CZ-REF': 'CZ', 'EEG PZ-REF': 'PZ'}

In [4]:
names = [name[:-4] for name in os.listdir('data/datasets/tuh_eeg_artifact/v3.0.0/edf/01_tcp_ar')]
names = list(set(names))
names = [name for name in names if not name.endswith("_seiz")]

In [5]:
for index, name in tqdm(enumerate(names)):
    raw = mne.io.read_raw_edf('data/datasets/tuh_eeg_artifact/v3.0.0/edf/01_tcp_ar/{}.edf'.format(name), preload=True)
    drop = [name for name in raw.ch_names if name not in mapping.values()]

    mne.datasets.eegbci.standardize(raw)  # Set channel names
    raw = raw.set_eeg_reference(ref_channels='average')
    montage = mne.channels.make_standard_montage('standard_1020')
    raw = raw.rename_channels(mapping)
    raw = raw.drop_channels(drop, on_missing = 'ignore')
    raw = raw.resample(256)
    raw = raw.filter(0.1, 80)
    raw = raw.anonymize()

    assert len(raw.ch_names) == 21

    csv = pandas.read_csv("data/datasets/tuh_eeg_artifact/v3.0.0/edf/01_tcp_ar/{}.csv".format(name), skiprows=6)
    csv = csv.drop(columns=["channel", "confidence"])
    csv = csv.drop_duplicates(subset=('start_time', 'stop_time', 'label'))
    csv = csv.sort_values(['start_time', 'stop_time', 'label'])
    csv = csv.reset_index(drop=True)

    booleans = np.zeros(len(csv), dtype=bool)
    booleans[0] = True
    idx = 0

    for i in range(1, len(csv)):
        if booleans[idx]:
            if csv.iloc[idx].stop_time < csv.iloc[i].start_time:
                idx = i
                booleans[i] = True

    csv = csv[booleans].reset_index(drop=True)

    onset = csv.start_time.values
    duration = csv.stop_time.values - csv.start_time.values
    description = csv.label.values

    print(description)

    annotations = mne.Annotations(onset, duration, description)
    raw = raw.set_annotations(annotations)

    dirname = '/home/s194260/BENDR/data/datasets/artifact/' + name.split('_')[1]
    filename = '/home/s194260/BENDR/data/datasets/artifact/' + name.split('_')[1] + '/' + name + '.edf'

    if not os.path.exists(dirname): os.makedirs(dirname)

    mne.export.export_raw(filename, raw, overwrite=True)

    raw = mne.io.read_raw_edf(filename)

    if index == 0:
        break

0it [00:00, ?it/s]

['eyem' 'elec' 'eyem' 'eyem' 'eyem' 'eyem' 'eyem' 'eyem' 'eyem' 'eyem'
 'eyem' 'eyem' 'eyem' 'eyem' 'musc' 'eyem' 'eyem' 'eyem' 'eyem' 'eyem'
 'eyem' 'eyem' 'eyem' 'musc' 'musc' 'eyem' 'eyem' 'eyem' 'eyem' 'elec'
 'eyem' 'elec' 'musc' 'musc' 'elec' 'musc' 'elec' 'eyem' 'eyem' 'eyem'
 'elec' 'eyem' 'elec' 'eyem' 'eyem' 'eyem_musc' 'musc' 'eyem_musc' 'eyem'
 'eyem' 'elec' 'musc' 'elec' 'elec' 'elec' 'eyem' 'eyem' 'eyem' 'eyem'
 'eyem' 'elec' 'elec' 'eyem']


0it [00:20, ?it/s]


In [108]:
experiment = ExperimentConfig('configs/downstream.yml')
ds_config = experiment.datasets['artifact']
ds_config_2 = experiment.datasets['mmidb']

Adding additional configuration entries: dict_keys(['train_params', 'lr', 'folds'])
Configuratron found 2 datasets.


In [109]:
dataset = ds_config.auto_construct_dataset()

Scanning data/datasets/artifact. If there are a lot of files, this may take a while...: 100%|██████████| 4/4 [00:00<00:00, 91.41it/s, extension=.gdf]


Creating dataset of 11 Preloaded Epoched recordings from 6 people.


Loading TUH artifact: 100%|██████████| 6/6 [00:01<00:00,  3.22person/s]

>> TUH artifact | DSID: None | 6 people | 540 trials | 90 channels | 1536 samples/trial | 256.0Hz | 0 transforms
Constructed 2 channel maps
Used by 4 recordings:
EEG (original(new)): FP1(FP1) FP2(FP2) F3(F3) F4(F4) C3(C3) C4(C4) P3(P3) P4(P4) O1(O1) O2(O2) F7(F7) F8(F8) T3(T3) T4(T4) T5(T5) T6(T6) FZ(FZ) CZ(CZ) PZ(PZ) 
EOG (original(new)): 
REF (original(new)): A1(A1) A2(A2) 
EXTRA (original(new)): 
Heuristically Assigned: 
--------------------
Excluded []
Used by 7 recordings:
EEG (original(new)): FP1(FP1) FP2(FP2) F3(F3) F4(F4) C3(C3) C4(C4) P3(P3) P4(P4) O1(O1) O2(O2) F7(F7) F8(F8) T3(T3) T4(T4) T5(T5) T6(T6) FZ(FZ) CZ(CZ) PZ(PZ) 
EOG (original(new)): 
REF (original(new)): A1(A1) A2(A2) 
EXTRA (original(new)): 
Heuristically Assigned: 
--------------------
Excluded []





In [51]:
from matplotlib import pyplot as plt

In [112]:
gen = utils.get_lmoso_iterator("artifact", ds_config)

Scanning data/datasets/artifact. If there are a lot of files, this may take a while...: 100%|██████████| 4/4 [00:00<00:00, 118.50it/s, extension=.gdf]


Creating dataset of 11 Preloaded Epoched recordings from 6 people.


Loading TUH artifact: 100%|██████████| 6/6 [00:01<00:00,  3.67person/s]

>> TUH artifact | DSID: None | 6 people | 540 trials | 90 channels | 1536 samples/trial | 256.0Hz | 0 transforms
Constructed 2 channel maps
Used by 8 recordings:
EEG (original(new)): FP1(FP1) FP2(FP2) F3(F3) F4(F4) C3(C3) C4(C4) P3(P3) P4(P4) O1(O1) O2(O2) F7(F7) F8(F8) T3(T3) T4(T4) T5(T5) T6(T6) FZ(FZ) CZ(CZ) PZ(PZ) 
EOG (original(new)): 
REF (original(new)): A1(A1) A2(A2) 
EXTRA (original(new)): 
Heuristically Assigned: 
--------------------
Excluded []
Used by 14 recordings:
EEG (original(new)): FP1(FP1) FP2(FP2) F3(F3) F4(F4) C3(C3) C4(C4) P3(P3) P4(P4) O1(O1) O2(O2) F7(F7) F8(F8) T3(T3) T4(T4) T5(T5) T6(T6) FZ(FZ) CZ(CZ) PZ(PZ) 
EOG (original(new)): 
REF (original(new)): A1(A1) A2(A2) 
EXTRA (original(new)): 
Heuristically Assigned: 
--------------------
Excluded []



