In [1]:
import os
import re
import random
import pprint
import mne
from mne.io.edf.edf import RawEDF
import numpy as np
from datetime import datetime
import pandas as pd

In [2]:
def info_dict(content):
    part_info_dict = {}
    
    line_nos=len(content)
    line_no=1

    channels = []
    file_name = []
    file_info_dict={}

    for line in content:
        # if there is Channel in the line...
        if re.findall('Channel \d+', line):
            # split the line into channel number and channel reference
            channel = line.split(': ')
            # get the channel reference and remove any new lines
            channel = channel[-1].replace("\n", "")
            # put into the channel list
            channels.append(channel)

        # if the line is the file name
        elif re.findall('File Name', line):
            # if there is already a file_name
            if file_name:
                # flush the current file info to it
                part_info_dict[file_name] = file_info_dict
        
            # get the file name
            file_name = re.findall('\w+\d+_\d+|\w+\d+\w+_\d+', line)[0]

            file_info_dict = {}
            # put the channel list in the file info dict and remove duplicates
            file_info_dict['Channels'] = list(set(channels))
            # reset the rest of the options
            file_info_dict['Start Time'] = ''
            file_info_dict['End Time'] = ''
            file_info_dict['Seizures Window'] = []

        # if the line is about the file start time
        elif re.findall('File Start Time', line):
            # get the start time
            file_info_dict['Start Time'] = re.findall('\d+:\d+:\d+', line)[0]

        # if the line is about the file end time
        elif re.findall('File End Time', line):
            # get the start time
            file_info_dict['End Time'] = re.findall('\d+:\d+:\d+', line)[0]

        elif re.findall('Seizure Start Time|Seizure End Time|Seizure \d+ Start Time|Seizure \d+ End Time', line):
            file_info_dict['Seizures Window'].append(int(re.findall('\d+', line)[-1]))

        # if last line in the list...
        if line_no == line_nos:
            # flush the file info to it
            part_info_dict[file_name] = file_info_dict

        line_no+=1
    return part_info_dict

def patient_summary(root, patient_id):
    regex = re.compile('^chb\d{2}-summary.txt$')
    summary_fname = [x for x in os.listdir(root + patient_id) if regex.search(x)]
    summary_fpath = root + patient_id + '/' + summary_fname[0]
#         patient_summary = self._parse_summary(summary_fpath)
    content_str = ''
    with open(summary_fpath) as f:
        content_str = f.readlines()
    patient_summary = info_dict(content_str)
    
    return patient_summary, patient_id

def seizure_dataframe(patient_summary, patient_id):
    df = pd.DataFrame({"patient": [], "case": [], "seizure_file": [], "number_of_channels": [], "channels": [], "seizure_number": [], "seizure_start":[], "seizure_end":[]})
    temp_pt = []
    temp_ss = []
    temp_es = []
    temp_file = []
    temp_ns = []
    temp_case = []
    temp_channels = []
    temp_numchannels = []
    
    data = patient_summary
    case = patient_id
    

    for i, key in enumerate(data.keys()):
        
        if len(data[key]['Seizures Window']) == 2:
            temp_ss.append(data[key]['Seizures Window'][0])
            temp_es.append(data[key]['Seizures Window'][1])
            temp_ns.append(1)
            temp_file.append(key)
            temp_case.append(case)
            temp_pt.append(case)
            temp_numchannels.append(len(data[key]['Channels']))
            temp_channels.append(data[key]['Channels'])
            
        if len(data[key]['Seizures Window'])  > 2:
            extras = len(data[key]['Seizures Window'])/2
            for j in range(int(extras)):
                temp_ss.append(data[key]['Seizures Window'][j*2])
                temp_es.append(data[key]['Seizures Window'][j*2+1])
                temp_ns.append(j+1)
                temp_file.append(key)
                temp_case.append(case)
                temp_pt.append(case)
                temp_numchannels.append(len(data[key]['Channels']))
                temp_channels.append(data[key]['Channels'])
            
    for i in range(len(temp_ss)):
        df.loc[len(df)] = [temp_pt[i],temp_case[i], temp_file[i], temp_numchannels[i], temp_channels[i], temp_ns[i], temp_ss[i], temp_es[i]]      

    df['seizure_duration'] = df.apply(lambda x: x['seizure_end'] - x['seizure_start'], axis=1)
    
    return df

In [4]:
rootdir = f'{os.getcwd()}/data/'
cases = ['chb01','chb02','chb03','chb04','chb05','chb06','chb07','chb08','chb09',
         'chb10', 'chb11','chb12','chb13','chb14','chb15','chb16','chb17','chb18',
         'chb19','chb20', 'chb21', 'chb22','chb23','chb24']
x = patient_summary(rootdir,cases[0])
x

({'chb01_01': {'Channels': ['P4-O2',
    'P7-O1',
    'FP1-F7',
    'C3-P3',
    'FT9-FT10',
    'FP2-F8',
    'T8-P8',
    'P7-T7',
    'T7-P7',
    'C4-P4',
    'F8-T8',
    'F7-T7',
    'CZ-PZ',
    'F4-C4',
    'F3-C3',
    'FT10-T8',
    'FP2-F4',
    'T7-FT9',
    'FZ-CZ',
    'P3-O1',
    'FP1-F3',
    'P8-O2'],
   'Start Time': '11:42:54',
   'End Time': '12:42:54',
   'Seizures Window': []},
  'chb01_02': {'Channels': ['P4-O2',
    'P7-O1',
    'FP1-F7',
    'C3-P3',
    'FT9-FT10',
    'FP2-F8',
    'T8-P8',
    'P7-T7',
    'T7-P7',
    'C4-P4',
    'F8-T8',
    'F7-T7',
    'CZ-PZ',
    'F4-C4',
    'F3-C3',
    'FT10-T8',
    'FP2-F4',
    'T7-FT9',
    'FZ-CZ',
    'P3-O1',
    'FP1-F3',
    'P8-O2'],
   'Start Time': '12:42:57',
   'End Time': '13:42:57',
   'Seizures Window': []},
  'chb01_03': {'Channels': ['P4-O2',
    'P7-O1',
    'FP1-F7',
    'C3-P3',
    'FT9-FT10',
    'FP2-F8',
    'T8-P8',
    'P7-T7',
    'T7-P7',
    'C4-P4',
    'F8-T8',
    'F7-T7',
    'CZ

In [6]:
df = pd.DataFrame({"patient": [], "case": [], "seizure_file": [], "number_of_channels": [], "channels": [], "seizure_number": [], "seizure_start":[], "seizure_end":[]})
cases = ['chb01','chb02','chb03','chb04','chb05','chb06','chb07','chb08','chb09',
         'chb10', 'chb11','chb12','chb13','chb14','chb15','chb16','chb17','chb18',
         'chb19','chb20', 'chb21', 'chb22','chb23','chb24']

for case in cases:
    data, patient_id = patient_summary(rootdir, case)
    temp_df = seizure_dataframe(data, patient_id)
    df = pd.concat([df,temp_df])

df = df.reset_index(drop=True)
df['patient'] = df['patient'].replace(['chb21'],'chb01')

df

  element = np.asarray(element)


Unnamed: 0,patient,case,seizure_file,number_of_channels,channels,seizure_number,seizure_start,seizure_end,seizure_duration
0,chb01,chb01,chb01_03,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,2996.0,3036.0,40.0
1,chb01,chb01,chb01_04,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1467.0,1494.0,27.0
2,chb01,chb01,chb01_15,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1732.0,1772.0,40.0
3,chb01,chb01,chb01_16,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1015.0,1066.0,51.0
4,chb01,chb01,chb01_18,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1720.0,1810.0,90.0
...,...,...,...,...,...,...,...,...,...
177,chb24,chb24,chb24_07,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,38.0,60.0,22.0
178,chb24,chb24,chb24_09,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1745.0,1764.0,19.0
179,chb24,chb24,chb24_13,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,3288.0,3304.0,16.0
180,chb24,chb24,chb24_14,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1939.0,1966.0,27.0


In [14]:
listofnumber = list(np.arange(1,25))
listofnumber[20]=1
listofnumber

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 1,
 22,
 23,
 24]

In [15]:
df['patient_number'] = df.loc[:, 'patient']
first_column = df.pop('patient_number')
listofnumber = list(np.arange(1,25))
  
# insert column using insert(position,column_name,first_column) function
df.insert(0, 'patient_number', first_column)

df['patient_number'] = df['patient_number'].replace(['chb01','chb02','chb03','chb04','chb05','chb06','chb07','chb08','chb09',
         'chb10', 'chb11','chb12','chb13','chb14','chb15','chb16','chb17','chb18',
         'chb19','chb20', 'chb21', 'chb22','chb23', 'chb24'], listofnumber)
df.sort_values("patient_number")


Unnamed: 0,patient_number,patient,case,seizure_file,number_of_channels,channels,seizure_number,seizure_start,seizure_end,seizure_duration
0,1,chb01,chb01,chb01_03,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,2996.0,3036.0,40.0
156,1,chb01,chb21,chb21_20,23.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,2627.0,2677.0,50.0
155,1,chb01,chb21,chb21_19,23.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1288.0,1344.0,56.0
157,1,chb01,chb21,chb21_21,23.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,2003.0,2084.0,81.0
6,1,chb01,chb01,chb01_26,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,1862.0,1963.0,101.0
...,...,...,...,...,...,...,...,...,...,...
172,24,chb24,chb24,chb24_03,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",2.0,2883.0,2908.0,25.0
170,24,chb24,chb24,chb24_01,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",2.0,2451.0,2476.0,25.0
169,24,chb24,chb24,chb24_01,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",1.0,480.0,505.0,25.0
174,24,chb24,chb24,chb24_04,22.0,"[P4-O2, P7-O1, FP1-F7, C3-P3, FT9-FT10, FP2-F8...",2.0,1411.0,1438.0,27.0


In [16]:
df.to_excel("output.xlsx")