In [2]:
import os

import mne
import numpy as np
import scipy as sp
from tqdm.notebook import trange

%matplotlib notebook

In [3]:
BANDS = [
    ("delta", 1, 4),
    ("theta", 4, 8),
    ("alpha", 8, 12),
    ("beta", 12, 25),
    ("gamma", 25, 40),
]

### ISRUS-Sleep

In [122]:
CHANNELS_ISRUC = ["F3-A2", "C3-A2", "O1-A2", "F4-A1", "C4-A1", "O2-A1"]

In [124]:
for subset in ["s1", "s3"]:
    print(subset)

    for recording_n in range(1, 11):
        print(f" {recording_n}")

        raw_edf = mne.io.read_raw_edf(
            f"datasets/isruc-sleep/{subset}/{recording_n}/{recording_n}.edf",
            preload=True,
            include=CHANNELS_ISRUC,
            verbose=False
        )

        data = []
        channel_names = []
        for band_name, low, high in BANDS:
            raw_band = raw_edf.copy().filter(low, high, verbose=False)
            data.append(raw_band.get_data())
            channel_names.extend(
                [f"{channel_name[:-3]}-{band_name}" for channel_name in CHANNELS_ISRUC]
            )
        data_combined = np.vstack(data)
        info = mne.create_info(ch_names=channel_names, sfreq=raw_edf.info['sfreq'], ch_types="eeg")
        raw_combined = mne.io.RawArray(data_combined, info, verbose=False)

        with open(
            f"datasets/isruc-sleep/{subset}/{recording_n}/{recording_n}_1.txt",
            mode="r",
            encoding="utf-8"
        ) as file:
            stages_1 = list(
                map(int, file.read().strip().split("\n"))
            )
        with open(
            f"datasets/isruc-sleep/{subset}/{recording_n}/{recording_n}_2.txt",
            mode="r",
            encoding="utf-8"
        ) as file:
            stages_2 = list(
                map(int, file.read().strip().split("\n"))
            )

        annotations_onsets = []
        annotations_descriptions = []
        for i, (stage_1, stage_2) in enumerate(zip(stages_1, stages_2)):
            if stage_1 == stage_2:
                annotations_onsets.append(i * 30)
                annotations_descriptions.append(
                    str(stage_1)
                )
            else:
                annotations_onsets.extend([i * 30, i* 30])
                annotations_descriptions.extend(
                    [f"{stage_1}_annot1", f"{stage_2}_annot2"]
                )

        annotations = mne.Annotations(
            onset=annotations_onsets,
            duration=30,
            description=annotations_descriptions
        )
        raw_combined.set_annotations(annotations)

        raw_combined.save(f"preprocessed/isruc-sleep/{subset}_{recording_n}_eeg.fif", overwrite=True, verbose=False)

s1
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
s3
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10


### Sleep-EDF Database Expanded

In [4]:
CHANNELS_SLEEPEDF = ["EEG Fpz-Cz", "EEG Pz-Oz"]
SUBSET_SLEEPEDF = [11, 12, 13, 14, 15, 30, 33, 71, 1, 2, 3, 4, 5, 20, 22, 60]
MAPPING_SLEEPEDF = {
    "Sleep stage W": "0",
    "Sleep stage 1": "1",
    "Sleep stage 2": "2",
    "Sleep stage 3": "3",
    "Sleep stage 4": "3",
    "Sleep stage R": "5",
}
PATH_SLEEPEDF = "datasets/sleep_edf_database_expanded/sleep-cassette"
DIR_SLEEPEDF = os.listdir(PATH_SLEEPEDF)

In [5]:
for recording_new_n, recording_original_n in enumerate(SUBSET_SLEEPEDF):
    print(f"{recording_new_n} ({recording_original_n})")

    filename_rec, filename_hypnogram = None, None
    for filename in DIR_SLEEPEDF:
        if f"SC4{recording_original_n:02d}1" in filename:
            if "PSG" in filename:
                filename_rec = filename
            if "Hypnogram" in filename:
                filename_hypnogram = filename

    raw_edf = mne.io.read_raw_edf(
        f"{PATH_SLEEPEDF}/SC4001E0-PSG.edf",
        preload=True,
        include=CHANNELS_SLEEPEDF,
        verbose=False
    )

    data = []
    channel_names = []
    for band_name, low, high in BANDS:
        raw_band = raw_edf.copy().filter(low, high, verbose=False)
        data.append(raw_band.get_data())
        channel_names.extend(
            [f"{channel_name[4:]}-{band_name}" for channel_name in CHANNELS_SLEEPEDF]
        )
    data_combined = np.vstack(data)
    info = mne.create_info(ch_names=channel_names, sfreq=raw_edf.info['sfreq'], ch_types="eeg")
    raw_combined = mne.io.RawArray(data_combined, info, verbose=False)

    rec_len = raw_combined.n_times / raw_combined.info['sfreq']

    annotations = mne.read_annotations(f"{PATH_SLEEPEDF}/{filename_hypnogram}")
    new_onsets = []
    new_durations = []
    new_descriptions = []
    for onset, duration, description in zip(annotations.onset, annotations.duration, annotations.description):
        if description in MAPPING_SLEEPEDF.keys() and onset < rec_len:
            new_descriptions.append(MAPPING_SLEEPEDF[description])
            new_onsets.append(onset)
            new_durations.append(duration)

    new_annotations = mne.Annotations(
        onset=new_onsets,
        duration=new_durations,
        description=new_descriptions
    )
    raw_combined.set_annotations(new_annotations, verbose="ERROR")

    raw_combined.crop(
        tmin=max(0, new_durations[0] - 3600),
        tmax=min(rec_len, new_onsets[-1] + 3600),
        include_tmax=False
    )

    print(raw_combined.n_times / raw_combined.info['sfreq'])

    raw_combined.save(f"preprocessed/sleep_edf_database_expanded/{recording_new_n}_eeg.fif", overwrite=True, verbose=False)

0 (11)
31470.0
1 (12)
35160.0
2 (13)
34440.0
3 (14)
33720.0
4 (15)
32280.0
5 (30)
31530.0
6 (33)
33240.0
7 (71)
45990.0
8 (1)
36690.0
9 (2)
34350.0
10 (3)
32160.0
11 (4)
40680.0
12 (5)
23760.0
13 (20)
34290.0
14 (22)
36570.0
15 (60)
44070.0


### EEGMAT

In [120]:
CHANNELS_EEGMAT = ["EEG F3", "EEG C3", "EEG O1", "EEG F4", "EEG C4", "EEG O2"]

In [121]:
for subject in trange(0, 36):
    raw_edf_1 = mne.io.read_raw_edf(
        f"datasets/eegmat/Subject{subject:02d}_1.edf",
        preload=True,
        include=CHANNELS_EEGMAT,
        verbose=False
    )
    raw_edf_1_len = raw_edf_1.times[-1] // 30 * 30
    raw_edf_1 = raw_edf_1.crop(tmax=raw_edf_1_len)

    raw_edf_2 = mne.io.read_raw_edf(
        f"datasets/eegmat/Subject{subject:02d}_2.edf",
        preload=True,
        include=CHANNELS_EEGMAT,
        verbose=False
    )
    raw_edf_2_len = raw_edf_2.times[-1] // 30 * 30
    raw_edf_2 = raw_edf_2.crop(tmax=raw_edf_2_len)

    raw_inp_combined = mne.concatenate_raws([raw_edf_1, raw_edf_2])

    data = []
    channel_names = []
    for band_name, low, high in BANDS:
        raw_band = raw_inp_combined.copy().filter(low, high, verbose=False)
        data.append(raw_band.get_data())
        channel_names.extend(
            [f"{channel_name[4:]}-{band_name}" for channel_name in CHANNELS_EEGMAT]
        )
    data_combined = np.vstack(data)
    info = mne.create_info(ch_names=channel_names, sfreq=raw_inp_combined.info['sfreq'], ch_types="eeg")
    raw_combined = mne.io.RawArray(data_combined, info, verbose=False)

    annotations = mne.Annotations(
        onset=[0, raw_edf_1_len],
        duration=[raw_edf_1_len, raw_edf_2_len],
        description=["0", "1"]
    )
    raw_combined.set_annotations(annotations)

    raw_combined.save(f"preprocessed/eegmat/{subject}_eeg.fif", overwrite=True, verbose=False)

  0%|          | 0/36 [00:00<?, ?it/s]

### MNIST

In [70]:
CHANNELS_MNIST = ["F3", "F4", "O1", "O2", "P7", "P8"]

In [75]:
events = {}
with open("datasets/mnist/EP1.01.txt", "r", encoding="utf-8") as file:
    for n, line in enumerate(file):
        if n % 50000 == 0:
            print(f"{n}/910476")

        _, event, _, channel, digit, size, data = line.split("\t")
        if channel not in CHANNELS_MNIST:
            continue
        if int(size) < 160:
            print("skipping")
            continue

        data = np.fromstring(data, sep=",", dtype=float)

        if event not in events:
            events[event] = {"_digit": digit, "_size": size}
        events[event][channel] = data

0/910476
50000/910476
100000/910476
150000/910476
200000/910476
250000/910476
300000/910476
350000/910476
400000/910476
450000/910476
500000/910476
550000/910476
600000/910476
650000/910476
700000/910476
750000/910476
800000/910476
skipping
skipping
skipping
skipping
skipping
skipping
850000/910476
900000/910476


In [76]:
for event_n, (event, data) in enumerate(events.items()):
    if event_n % 10000 == 0:
        print(f"{event_n}")

    digit = data["_digit"]
    size = data["_size"]

    sfreq = int(size) / 2

    ch_array = np.array(
        [(data[ch] - data[ch].mean()) * 0.000001 for ch in CHANNELS_MNIST],
        dtype=float
    )
    info = mne.create_info(ch_names=CHANNELS_MNIST, sfreq=sfreq, ch_types="eeg")
    raw = mne.io.RawArray(ch_array, info, verbose=False)

    data = []
    channel_names = []
    for band_name, low, high in BANDS:
        raw_band = raw.copy().filter(low, high, filter_length='auto', verbose="ERROR")
        data.append(raw_band.get_data())
        channel_names.extend(
            [f"{channel_name}-{band_name}" for channel_name in CHANNELS_MNIST]
        )
    data_combined = np.vstack(data)
    info = mne.create_info(ch_names=channel_names, sfreq=raw.info['sfreq'], ch_types="eeg")
    raw_combined = mne.io.RawArray(data_combined, info, verbose=False)

    raw_combined.set_annotations(
        mne.Annotations(onset=[0], duration=[2], description=[digit])
    )

    raw_combined.save(f"preprocessed/mnist/{event_n}_eeg.fif", overwrite=True, verbose=False)

0
10000
20000
30000
40000
50000
60000


### SPIS

In [1]:
CHANNELS_SPIS = ["F3", "C3", "O1", "F4", "C4", "O2"]
CHANNELS_FULL_SPIS = [
    "Fp1", "AF7", "AF3", "F1", "F3", "F5", "F7", "FT7", "FC5", "FC3", "FC1", "C1", "C3", "C5", "T7", "TP7",
    "CP5", "CP3", "CP1", "P1", "P3", "P5", "P7", "P9", "PO7", "PO3", "O1", "Iz", "Oz", "POz", "Pz", "CPz",
    "Fpz", "Fp2", "AF8", "AF4", "Afz", "Fz", "F2", "F4", "F6", "F8", "FT8", "FC6", "FC4", "FC2", "FCz", "Cz",
    "C2", "C4", "C6", "T8", "TP8", "CP6", "CP4", "CP2", "P2", "P4", "P6", "P8", "P10", "PO8", "PO4", "O2"
]
CHANNELS_NUMS_SPIS = [
    CHANNELS_FULL_SPIS.index(x) for x in CHANNELS_SPIS
]

In [2]:
CHANNELS_NUMS_SPIS

[4, 12, 26, 39, 49, 63]

In [8]:
for subject_new_n, subject_n in enumerate(trange(2, 12)):
    eo = sp.io.loadmat(f"datasets/spis/S{subject_n:02d}_restingPre_EO.mat")["dataRest"][
        CHANNELS_NUMS_SPIS
    ]
    ec = sp.io.loadmat(f"datasets/spis/S{subject_n:02d}_restingPre_EC.mat")["dataRest"][
        CHANNELS_NUMS_SPIS
    ]
    data = np.concatenate((eo, ec), axis=1) * 0.000000001

    info = mne.create_info(ch_names=CHANNELS_SPIS, sfreq=256, ch_types="eeg")
    raw = mne.io.RawArray(data, info, verbose=False)

    data = []
    channel_names = []
    for band_name, low, high in BANDS:
        raw_band = raw.copy().filter(low, high, filter_length='auto', verbose="ERROR")
        data.append(raw_band.get_data())
        channel_names.extend(
            [f"{channel_name}-{band_name}" for channel_name in CHANNELS_SPIS]
        )
    data_combined = np.vstack(data)
    info = mne.create_info(ch_names=channel_names, sfreq=256, ch_types="eeg")
    raw_combined = mne.io.RawArray(data_combined, info, verbose=False)

    raw_combined.set_annotations(
        mne.Annotations(onset=[0, 150], duration=[150, 150], description=["0", "1"])
    )

    raw_combined.save(f"preprocessed/spis/{subject_new_n}_eeg.fif", overwrite=True, verbose=False)

  0%|          | 0/10 [00:00<?, ?it/s]