Average seizure clips of each patient

In [1]:
import numpy as np
import pandas as pd
from ieeg.auth import Session
import re
import matplotlib.pyplot as plt
# from keras.models import model_from_json
# from tensorflow.keras.optimizers import Adam
import os
# import pickle
import gc
from numbers import Number
from sklearn.preprocessing import StandardScaler
from sklearn.exceptions import ConvergenceWarning
from scipy.signal import butter, sosfilt, resample

def get_iEEG_data(
    username,
    password_bin_file,
    iEEG_filename,
    start_time_usec,
    stop_time_usec=None,
    select_electrodes=None,
):

    with open(password_bin_file, "r") as f:
        s = Session(username, f.read())
    ds = s.open_dataset(iEEG_filename)
    all_channel_labels = ds.get_channel_labels()

    start_time_usec = int(start_time_usec)
    
    if stop_time_usec is not None:
        stop_time_usec = int(stop_time_usec)
    else:
        stop_time_usec = int(ds.get_time_series_details(all_channel_labels[0]).duration)

    duration = stop_time_usec - start_time_usec

    if select_electrodes is not None:
        if isinstance(select_electrodes[0], Number):
            channel_ids = select_electrodes
            channel_names = [all_channel_labels[e] for e in channel_ids]
        elif isinstance(select_electrodes[0], str):
            channel_ids = [
                i for i, e in enumerate(all_channel_labels) if e in select_electrodes
            ]
            channel_names = select_electrodes
        else:
            print("Electrodes not given as a list of ints or strings")
    else:
        channel_ids = ds.get_channel_indices(all_channel_labels)
        channel_names = all_channel_labels

    try:
        data = ds.get_data(start_time_usec, duration, channel_ids)
    except:
        # clip is probably too big, pull chunks and concatenate
        clip_size = 60 * 1e6
        clip_start = start_time_usec
        data = None
        while clip_start + clip_size < stop_time_usec:
            if data is None:
                data = ds.get_data(clip_start, clip_size, channel_ids)
            else:
                data = np.concatenate(
                    ([data, ds.get_data(clip_start, clip_size, channel_ids)]), axis=0
                )
            clip_start = clip_start + clip_size
        data = np.concatenate(
            ([data, ds.get_data(clip_start, stop_time_usec - clip_start, channel_ids)]),
            axis=0,
        )

    df = pd.DataFrame(data, columns=channel_names)
    fs = ds.get_time_series_details(ds.ch_labels[0]).sample_rate  # get sample rate

    return df, fs, stop_time_usec

In [20]:
def extract_data_and_labels(save_path,clips,idx):
    file_name = clips['ieeg_file_name'].iloc[idx]
    a = clips.iloc[idx]
    #define times
    clip_start = a['clip_start']*1e6
    if a['clip_end'] > 0 : clip_end = a['clip_end']*1e6
    else : clip_end = None
    seizure_on = a['onset_time']*1e6
    if a['offset_time_1']*1e6 > seizure_on:
        seizure_off = a['offset_time_1']*1e6
    elif a['offset_time_2']*1e6 > seizure_on:
        seizure_off = a['offset_time_2']*1e6
    else:
        seizure_off = a['offset_time_3']

    #load from iEEG
    df, fs, clip_end = get_iEEG_data(username='joie1',
                        password_bin_file='ieeglogin.bin',
                        iEEG_filename=file_name,
                        start_time_usec=clip_start,
                        stop_time_usec=clip_end,
                        select_electrodes=None)

    #convert labels
    labels = [0]*int((seizure_on-clip_start)/1e6*fs)+[1]*int((seizure_off-seizure_on)/1e6*fs)+[0]*int((clip_end-seizure_off)/1e6*fs)
    labels = labels+[0]*(df.shape[0]-len(labels))
    id = clips['admission_id'].iloc[idx]+'_{}'.format(idx+1)

    df.to_pickle(save_path+id)
    return df, id, labels

In [3]:
import pandas as pd
import numpy as np
anno = pd.read_csv('seizure_annotations_Aug0824_UEO_EEC.csv')
patient = list(set(anno['admission_id']))
seizure_num = []
for i in patient:
    seizure_num.append(list(anno['admission_id']).count(i))
print('The number of patients is:', len(patient))
print('The average number of clips per patients is:', np.mean(seizure_num))

patient_clip_num = {
    'admission_id': patient,

    'number of clips': seizure_num
}
patient_clip_num = pd.DataFrame(patient_clip_num)
print(patient_clip_num)

The number of patients is: 49
The average number of clips per patients is: 4.469387755102041
   admission_id  number of clips
0       EMU1061                1
1       EMU1954                2
2       EMU1395                1
3       EMU2144                2
4       EMU1653                1
5       EMU1838                4
6       EMU2102                2
7       EMU2091                2
8       EMU1835                4
9       EMU1965                1
10      EMU2138               45
11      EMU1407                1
12      EMU1843                3
13      EMU1670                3
14      EMU2143                2
15      EMU2113               13
16      EMU2147                1
17      EMU1839                2
18      EMU2109                1
19      EMU2141               10
20      EMU1669                1
21      EMU1648                3
22      EMU1680                6
23      EMU1829               10
24      EMU2089                2
25      EMU1359                1
26      EMU1834 

Label the clip with seizures

In [44]:
temp_clips = anno.loc[anno['admission_id'] == patient[6]]
temp_clips

Unnamed: 0,seizure_number,ieeg_file_name,admission_id,clip_start,clip_end,onset_time,onset_annotation,UEO vs EEC,offset_time_1,offset_annotation_1,offset_time_2,offset_annotation_2,offset_time_3,offset_annotation_3
114,115,EMU2102_Event_Day06_1,EMU2102,64386.46875,64874.75,64467.332031,"UEO,L,frontal,temporal,spike,r,r,r,L,temporal,...",UEO,64506.617187,"r,r,l,off",64536.199218,OFF,,
115,116,EMU2102_Event_Day06_1,EMU2102,76206.78125,76457.374999,76362.339843,"UEO,L,spike",UEO,76421.699218,OFF,,,,


In [22]:
save_path = './EEGData/'
ids, labels = [],[]

In [47]:
temp_clips = anno.loc[anno['admission_id'] == patient[6]]
_, x, y = extract_data_and_labels(temp_clips,1)
ids.append(x)
labels.append(y)

In [56]:
label_ = pd.DataFrame({'file_name':ids,'labels':labels})
label_.to_csv('./EEGData/labels.csv')

Unnamed: 0,file_name,labels
0,EMU1061_1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,EMU1954_1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,EMU1954_2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,EMU1395_1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,EMU2144_1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
5,EMU2144_2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
6,EMU1653_1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
7,EMU1838_1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
8,EMU1838_2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
9,EMU1838_3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


For fully extract

In [72]:
ids, labels = [], [] 
for patient_idx in len(patient):
    temp_clips = anno.loc[anno['admission_id'] == patient[patient_idx]]
    try:
        for i in range(seizure_num[patient_idx]):
            _,x,y = extract_data_and_labels(temp_clips,i)
            ids.append(x)
            labels.append(y)
    except:
        _,x,y = extract_data_and_labels(temp_clips,0)
        ids.append(x)
        labels.append(y)
label_ = pd.DataFrame({'file_name':ids,'labels':labels})