In [1]:
import os
import re
import mne
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
%matplotlib osx

In [3]:
root = '/Volumes/My Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01'

In [4]:
!cat /Volumes/My\ Passport/AI_Research/data/physionet.org/files/chbmit/1.0.0/chb01/chb01-summary.txt

Data Sampling Rate: 256 Hz
*************************

Channels in EDF Files:
**********************
Channel 1: FP1-F7
Channel 2: F7-T7
Channel 3: T7-P7
Channel 4: P7-O1
Channel 5: FP1-F3
Channel 6: F3-C3
Channel 7: C3-P3
Channel 8: P3-O1
Channel 9: FP2-F4
Channel 10: F4-C4
Channel 11: C4-P4
Channel 12: P4-O2
Channel 13: FP2-F8
Channel 14: F8-T8
Channel 15: T8-P8
Channel 16: P8-O2
Channel 17: FZ-CZ
Channel 18: CZ-PZ
Channel 19: P7-T7
Channel 20: T7-FT9
Channel 21: FT9-FT10
Channel 22: FT10-T8
Channel 23: T8-P8

File Name: chb01_01.edf
File Start Time: 11:42:54
File End Time: 12:42:54
Number of Seizures in File: 0

File Name: chb01_02.edf
File Start Time: 12:42:57
File End Time: 13:42:57
Number of Seizures in File: 0

File Name: chb01_03.edf
File Start Time: 13:43:04
File End Time: 14:43:04
Number of Seizures in File: 1
Seizure Start Time: 2996 seconds
Seizure End Time: 3036 seconds

File Name: chb01_04.edf
File Start Time: 14:43:12
File End Time: 15:43:12
Number of Seizures in File: 1
S

There are 7 seizures available for this patient. There are some missing files in the sequence. Most files have a duration of 1 hour, however, some do not - especially files containing seizure data.

In [5]:
def parse_summary(fpath):
    file_metadata = []
    with open(fpath) as f:
        content_str = f.read()
        regex = re.compile(r'^\Z|\*+') # match empty string or literal asterisks
        filtered = [x for x in content_str.split('\n') if not regex.search(x)]
        regex = re.compile('Channel \d+') # match channel numbers
        channels = [x.split(':')[-1].strip() for x in filtered if regex.search(x)]
        regex = re.compile('Data Sampling Rate:')
        fs = int([x.split(':')[-1].strip() for x in filtered if regex.search(x)][0].split(' ')[0])
        regex = re.compile('^(?!Channel|Data).') # match file names
        file_metas = [x for x in filtered if regex.findall(x)]
        file_meta = {}
        for x in file_metas:
            k, v = x.partition(':')[::2]

            if k == 'Seizure Start Time':
                file_meta['Seizure Start Time'] = v
            if k == 'Seizure End Time':
                file_meta['Seizure End Time'] = v
                tup_meta = {'File Name': file_meta['File Name'], 
                                'File Start Time': file_meta['File Start Time'], 
                                'File End Time': file_meta['File End Time'],
                                'Number of Seizures in File': file_meta['Number of Seizures in File'],
                                'Seizure Start Time': file_meta['Seizure Start Time'],
                                'Seizure End Time': file_meta['Seizure End Time']
                               }
                file_metadata.append(tup_meta)

            if k == 'File Name':
                file_meta['File Name'] = v.strip()
            if k == 'File Start Time':
                file_meta['File Start Time'] = v.strip()
            if k == 'File End Time':
                file_meta['File End Time'] = v.strip()
            if k == 'Number of Seizures in File':
                if int(v) == 0:
                    if 'Seizure End Time' in file_meta:
                        del file_meta['Seizure End Time']
                    if 'Seizure Start Time' in file_meta:
                        del file_meta['Seizure Start Time']
                    file_meta['Number of Seizures in File'] = 0
                    tup_meta = {'File Name': file_meta['File Name'], 
                                'File Start Time': file_meta['File Start Time'], 
                                'File End Time': file_meta['File End Time'],
                                'Number of Seizures in File': file_meta['Number of Seizures in File']
                               }
                    file_metadata.append(tup_meta)
                if int(v) > 0:
                    file_meta['Number of Seizures in File'] = int(v.strip())

    return file_metadata

In [6]:
print('Initialising...', end='')
dict_files = parse_summary(os.path.join(root, os.listdir(root)[0])) # load chb01-summary.txt
print('[Done]', end='\n\n')

Initialising...[Done]



In [7]:
df = pd.DataFrame(data=dict_files)
df.to_excel('chb_01_summary.xlsx')
df

Unnamed: 0,File Name,File Start Time,File End Time,Number of Seizures in File,Seizure Start Time,Seizure End Time
0,chb01_01.edf,11:42:54,12:42:54,0,,
1,chb01_02.edf,12:42:57,13:42:57,0,,
2,chb01_03.edf,13:43:04,14:43:04,1,2996 seconds,3036 seconds
3,chb01_04.edf,14:43:12,15:43:12,1,1467 seconds,1494 seconds
4,chb01_05.edf,15:43:19,16:43:19,0,,
5,chb01_06.edf,16:43:26,17:43:26,0,,
6,chb01_07.edf,17:43:33,18:43:33,0,,
7,chb01_08.edf,18:43:40,19:43:40,0,,
8,chb01_09.edf,19:43:56,20:43:56,0,,
9,chb01_10.edf,20:44:07,21:44:07,0,,


In [8]:
print('Index | Filename | Contents')
print('---')
raws = []
i = 0
for dict_file in dict_files:
    if dict_file['File Name'].endswith('edf'):
        fpath = os.path.join(root, dict_file['File Name'])
        raw = mne.io.read_raw_edf(input_fname=fpath, preload=False, verbose='Error')
        print(i, dict_file['File Name'], ' ', end='')
        if dict_file['Number of Seizures in File'] > 0:
            start_secs = int(dict_file['Seizure Start Time'].split(' ')[-2])
            end_secs = int(dict_file['Seizure End Time'].split(' ')[-2])
            annot = mne.Annotations(onset=[0, start_secs, end_secs],  # in seconds
                           duration=[start_secs,  (end_secs - start_secs), ((60*59)-end_secs)],  # in seconds, too
                           description=['Background', 'Seizure', 'Background'])
            raw.set_annotations(annot)
            print(annot)
        else:
            annot = mne.Annotations(onset=[0], duration=[3600], description=['Background'])
            raw.set_annotations(annot)
            print(annot)
#         clean_channels(raw)
        raws.append(raw)
    i = i + 1
print('Total files:', len(raws))

Index | Filename | Contents
---
0 chb01_01.edf  <Annotations | 1 segment: Background (1)>
1 chb01_02.edf  <Annotations | 1 segment: Background (1)>
2 chb01_03.edf  <Annotations | 3 segments: Background (2), Seizure (1)>
3 chb01_04.edf  <Annotations | 3 segments: Background (2), Seizure (1)>
4 chb01_05.edf  <Annotations | 1 segment: Background (1)>
5 chb01_06.edf  <Annotations | 1 segment: Background (1)>
6 chb01_07.edf  <Annotations | 1 segment: Background (1)>
7 chb01_08.edf  <Annotations | 1 segment: Background (1)>
8 chb01_09.edf  <Annotations | 1 segment: Background (1)>
9 chb01_10.edf  <Annotations | 1 segment: Background (1)>
10 chb01_11.edf  <Annotations | 1 segment: Background (1)>
11 chb01_12.edf  <Annotations | 1 segment: Background (1)>
12 chb01_13.edf  <Annotations | 1 segment: Background (1)>
13 chb01_14.edf  <Annotations | 1 segment: Background (1)>
14 chb01_15.edf  <Annotations | 3 segments: Background (2), Seizure (1)>
15 chb01_16.edf  <Annotations | 3 segments: Backgro

In [9]:
# mne.concatenate_raws(raws) # updates in-place on raws[0]

The patient has 40 hours and 33 mins of EEG data available in total.

In [10]:
# plot parameters
plot_kwargs = {
    'scalings': dict(eeg=25e-5), # manually set scale to 500 uV for 'best' representation
    'show_scrollbars': True,
    'show': True
}

In [11]:
seizure_raw_indices = [2, 3, 14, 15, 17, 20, 25]
for i in seizure_raw_indices:
    raws[i].plot(**plot_kwargs);

Channels marked as bad: none
Channels marked as bad: none
Channels marked as bad: none
Channels marked as bad: none
Channels marked as bad: none
Channels marked as bad: none
Channels marked as bad: none


#### 