In [1]:
import os
import re
import mne
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
root = '/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/'

## Preictal Class

Training set for this patient will consist of 3 preictal files of length 900 s (15 mins). Test set will contain another 3 preictal files of the same length.

In [3]:
# training_set_names = ['chb01_03.edf' 'chb01_04.edf' 'chb01_15.edf']
# test_set_names = ['chb01_16.edf' 'chb01_18.edf' 'chb01_26.edf']

regex = re.compile(r'^(chb01_03\.edf|chb01_04\.edf|chb01_15\.edf)$')
training_set_files = [root+x for x in os.listdir(root) if regex.search(x)]

regex = re.compile(r'^(chb01_16\.edf|chb01_18\.edf|chb01_26\.edf)$')
test_set_files = [root+x for x in os.listdir(root) if regex.search(x)]

In [4]:
training_set_files

['/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_03.edf',
 '/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_04.edf',
 '/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_15.edf']

In [5]:
test_set_files

['/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_16.edf',
 '/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_18.edf',
 '/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_26.edf']

In [6]:
# init dict with train filenames and 15 mins preictal interval
preictal_train_dict = {}
train_preictal_starts = [2096, 567, 832]
train_preictal_ends = [2996, 1467, 1732]
for i, fpath in enumerate(tqdm(training_set_files)):
#     print('File:', fpath.split('/')[-1])
#     print('Crop start (s):', train_preictal_starts[i])
#     print('Crop end (s):', train_preictal_ends[i])
    
    raw = mne.io.read_raw_edf(input_fname=fpath, preload=False, verbose='Error')
    raw_crop = raw.crop(tmin=train_preictal_starts[i], tmax=train_preictal_ends[i], include_tmax=True)
    data = raw_crop.get_data(picks='all', return_times=False, units='uV')
    preictal_train_dict[fpath.split('/')[-1]] = data

100%|██████████| 3/3 [00:01<00:00,  1.87it/s]


In [7]:
preictal_test_dict = {}
test_preictal_starts = [115, 820, 962]
test_preictal_ends = [1015, 1720, 1862]
for i, fpath in enumerate(tqdm(test_set_files)):
    raw = mne.io.read_raw_edf(input_fname=fpath, preload=False, verbose='Error')
    raw_crop = raw.crop(tmin=test_preictal_starts[i], tmax=test_preictal_ends[i], include_tmax=True)
    data = raw_crop.get_data(picks='all', return_times=False, units='uV')
    preictal_test_dict[fpath.split('/')[-1]] = data

100%|██████████| 3/3 [00:00<00:00,  4.11it/s]


## Interictal class

Training set for this patient will consist of 45 mins of interictal data randomly selected from all available data for this patient. Test set will consist of the remaining ~39 hours of interictal data, excluding the 45 mins interval used for training.

In [8]:
# initially, use 1 file with only interictal data as train...
# ...and remaining interictal files as test

In [9]:
# filter out files from preictal class
regex = re.compile(r'^(chb01_03\.edf|chb01_04\.edf|chb01_15\.edf|chb01_16\.edf|chb01_18\.edf|chb01_26\.edf)|\.(seizures)|\.(txt)|\.(html)$')
interictal_files = [root+x for x in os.listdir(root) if not regex.search(x)]

In [10]:
interictal_files_len = len(interictal_files)
print(interictal_files_len)
index = np.random.randint(interictal_files_len)
print(interictal_files.pop(index))
print(len(interictal_files))

36
/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01_42.edf
35
