In this notebook, I show that braindecode dataset's create_from_mne_raw does not actually work with any edf file.

This is because in their example, the data loaded has annotations, which is necessary for the creation of the database. If there are no annotations in the mne rawEDF object after it is read, either add them yourself or use another function than create_windows_from_events , such as create_fixed_length_windows. This is because events are created using annotations, hence without annotations this function returns an error.   

# Step 1: Load the files, you want a list of rawEDF (TUSZ)
## TUSZ data

In [1]:
import mne            
from braindecode.datasets import create_from_mne_raw
from braindecode.datasets.base import BaseDataset, BaseConcatDataset
import glob
import os

TUSZ_path = ('/rds/general/user/nm2318/home/projects/scott_data_tuh/live/tuh_eeg_seizure/v2.0.0')

debug = True 
file_paths = glob.glob(os.path.join(TUSZ_path, '**/*.edf'), recursive=True)
# If debug is true and the number of edf files is bigger than 5, select 
# the first 5 file_paths
if (debug and len(file_paths)>100):
        file_paths = file_paths[0:5]

# Load each of the files
parts = [mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in file_paths]
attrs = [part.info for part in parts]

base_datasets = [BaseDataset(raw) for raw in parts]
base_datasets[0].raw
base_datasets[0]

2023-11-15 12:25:09.703596: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-15 12:25:09.740423: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-15 12:25:09.740485: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-15 12:25:09.740506: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-15 12:25:09.748103: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-15 12:25:09.748744: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

<braindecode.datasets.base.BaseDataset at 0x1541fd38a290>

In [2]:
base_datasets

[<braindecode.datasets.base.BaseDataset at 0x1541fd38a290>,
 <braindecode.datasets.base.BaseDataset at 0x1541f82d44d0>,
 <braindecode.datasets.base.BaseDataset at 0x1541f82d5550>,
 <braindecode.datasets.base.BaseDataset at 0x1541f82d42d0>,
 <braindecode.datasets.base.BaseDataset at 0x1541f723fa50>]

## Create a concatenated version of the basedataset.
What you need to feed the class is a list of all the basedatasets you want to concatenate, even if it only contains 1 values

In [3]:
base_concat_datasets = BaseConcatDataset(base_datasets)
base_concat_datasets.datasets[0].raw #sanity check
base_concat_datasets.datasets[0].raw.annotations

<Annotations | 0 segments>

## Load them as windows dataset 
This fails when using tusz data, I added a some checkpoints in the braindecode library file to show what differs between the examples and using the TUSZ

In [4]:
windows_dataset = create_from_mne_raw(
    parts,
    trial_start_offset_samples=0,
    trial_stop_offset_samples=0,
    window_size_samples=500,
    window_stride_samples=500,
    drop_last_window=True)


checkpoint 1, in windowers.py
<braindecode.datasets.base.BaseConcatDataset object at 0x1542ba950d50>

checkpoint 2, in windowers.py, _create_... function
<braindecode.datasets.base.BaseDataset object at 0x1542bc140750>
<RawEDF | aaaaaexe_s001_t001.edf, 41 x 356500 (1426.0 s), ~111.6 MB, data loaded>
<Annotations | 0 segments>
[]
checkpoint 3, infer_mapping is True
checkpoint 4, mapping:  {}
checkpoint 5, event, eventid:  [] {}


IndexError: index -1 is out of bounds for axis 0 with size 0

# Step 2: do it for their data - look how it works and how the checkpoints differ

In [5]:
import mne
from braindecode.datasets import (create_from_mne_raw, create_from_mne_epochs)
from braindecode.datasets.base import BaseDataset, BaseConcatDataset


subject_id = 12
event_codes = [5, 6, 9, 10, 13, 14]
# event_codes = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

# This will download the files if you don't have them yet,
# and then return the paths to the files.
physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes, update_path=False)

# Load each of the files
parts_test = [mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths]

base_datasets_test = [BaseDataset(raw) for raw in [parts_test]]
base_concat_datasets_test = BaseConcatDataset(base_datasets_test)
len(parts_test)

Using default location ~/mne_data for EEGBCI...


6

In [6]:
windows_dataset_test = create_from_mne_raw(
    parts_test,
    trial_start_offset_samples=0,
    trial_stop_offset_samples=0,
    window_size_samples=500,
    window_stride_samples=500,
    drop_last_window=True)

checkpoint 1, in windowers.py
<braindecode.datasets.base.BaseConcatDataset object at 0x1541ec02eb10>

checkpoint 2, in windowers.py, _create_... function
<braindecode.datasets.base.BaseDataset object at 0x1541ec02db50>
<RawEDF | S012R05.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>
<Annotations | 30 segments: T0 (15), T1 (7), T2 (8)>
['T0' 'T1' 'T0' 'T2' 'T0' 'T1' 'T0' 'T2' 'T0' 'T2' 'T0' 'T1' 'T0' 'T2'
 'T0' 'T1' 'T0' 'T1' 'T0' 'T2' 'T0' 'T2' 'T0' 'T1' 'T0' 'T2' 'T0' 'T1'
 'T0' 'T2']
checkpoint 3, infer_mapping is True
checkpoint 4, mapping:  {'T0': 0, 'T1': 1, 'T2': 2}
Used Annotations descriptions: ['T0', 'T1', 'T2']
checkpoint 5, event, eventid:  [[    0     0     0]
 [  656     0     1]
 [ 1312     0     0]
 [ 1968     0     2]
 [ 2624     0     0]
 [ 3280     0     1]
 [ 3936     0     0]
 [ 4592     0     2]
 [ 5248     0     0]
 [ 5904     0     2]
 [ 6560     0     0]
 [ 7216     0     1]
 [ 7872     0     0]
 [ 8528     0     2]
 [ 9184     0     0]
 [ 9840     0     1]
 [

In [7]:
print(base_concat_datasets_test)
print(base_concat_datasets_test.datasets[0])
print(base_concat_datasets_test.datasets[0].raw)

<braindecode.datasets.base.BaseConcatDataset object at 0x1541ebf0e4d0>
<braindecode.datasets.base.BaseDataset object at 0x1541ebebc290>
[<RawEDF | S012R05.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>, <RawEDF | S012R06.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>, <RawEDF | S012R09.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>, <RawEDF | S012R10.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>, <RawEDF | S012R13.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>, <RawEDF | S012R14.edf, 64 x 19680 (123.0 s), ~9.7 MB, data loaded>]


In [8]:
parts_test[0].annotations 

<Annotations | 30 segments: T0 (15), T1 (7), T2 (8)>

In [9]:
parts_test[0].annotations.description

array(['T0', 'T1', 'T0', 'T2', 'T0', 'T1', 'T0', 'T2', 'T0', 'T2', 'T0',
       'T1', 'T0', 'T2', 'T0', 'T1', 'T0', 'T1', 'T0', 'T2', 'T0', 'T2',
       'T0', 'T1', 'T0', 'T2', 'T0', 'T1', 'T0', 'T2'], dtype='<U2')