In [1]:
import numpy as np
import pandas as pd
from ieeg.auth import Session
import os
import time
from tqdm import tqdm
from numbers import Number
import pyedflib
import torch
import glob
import random

In [3]:
def get_iEEG_data(
    username,
    password_bin_file,
    iEEG_filename,
    start_time_usec,
    stop_time_usec=None,
    select_electrodes=None,
):

    with open(password_bin_file, "r") as f:
        s = Session(username, f.read())
    ds = s.open_dataset(iEEG_filename)
    all_channel_labels = ds.get_channel_labels()

    start_time_usec = int(start_time_usec)
    
    if stop_time_usec is not None:
        stop_time_usec = int(stop_time_usec)
    else:
        stop_time_usec = int(ds.get_time_series_details(all_channel_labels[0]).duration)

    duration = stop_time_usec - start_time_usec

    if select_electrodes is not None:
        if isinstance(select_electrodes[0], Number):
            channel_ids = select_electrodes
            channel_names = [all_channel_labels[e] for e in channel_ids]
        elif isinstance(select_electrodes[0], str):
            channel_ids = [
                i for i, e in enumerate(all_channel_labels) if e in select_electrodes
            ]
            channel_names = select_electrodes
        else:
            print("Electrodes not given as a list of ints or strings")
    else:
        channel_ids = ds.get_channel_indices(all_channel_labels)
        channel_names = all_channel_labels

    try:
        data = ds.get_data(start_time_usec, duration, channel_ids)
    except:
        # clip is probably too big, pull chunks and concatenate
        clip_size = 60 * 1e6
        clip_start = start_time_usec
        data = None
        while clip_start + clip_size < stop_time_usec:
            if data is None:
                data = ds.get_data(clip_start, clip_size, channel_ids)
            else:
                data = np.concatenate(
                    ([data, ds.get_data(clip_start, clip_size, channel_ids)]), axis=0
                )
            clip_start = clip_start + clip_size
        data = np.concatenate(
            ([data, ds.get_data(clip_start, stop_time_usec - clip_start, channel_ids)]),
            axis=0,
        )

    df = pd.DataFrame(data, columns=channel_names)
    fs = ds.get_time_series_details(ds.ch_labels[0]).sample_rate  # get sample rate
    return df

def save_data(save_path,clips,idx,select_channels):
    file_name = clips['ieeg_file_name'].iloc[idx]
    a = clips.iloc[idx]
    #define times
    clip_start = a['clip_start']*1e6
    if a['clip_end']>0: clip_end = a['clip_end']*1e6
    else : clip_end = None
    seizure_on = a['onset_time']*1e6
    if seizure_on < clip_start:
        seizure_on = clip_start
    if a['offset_time_1']*1e6 > seizure_on:
        seizure_off = a['offset_time_1']*1e6
    elif a['offset_time_2']*1e6 > seizure_on:
        seizure_off = a['offset_time_2']*1e6
    else:
        seizure_off = a['offset_time_3']*1e6
    select_channels = ['C3', 'C4', 'Cz', 'F3', 'F4', 'F7', 'F8', 'Fp1', 'Fp2',
                    'Fz', 'O1', 'O2', 'P3', 'P4', 'T3', 'T4', 'T5', 'T6']
    #load from iEEG
    df, fs, clip_end = get_iEEG_data(username='joie1',
                        password_bin_file='ieeglogin.bin',
                        iEEG_filename=file_name,
                        start_time_usec=clip_start,
                        stop_time_usec=clip_end,
                        select_electrodes=select_channels)
    #replace Pz channel
    if 'Pz' in list(df.columns):
        df['Pz'] = (df['P3']+df['Cz']+df['P4']+df['O1']+df['O2'])/5
        
    #convert labels
    labels = [0]*int((seizure_on-clip_start)/1e6*fs)+[1]*int((seizure_off-seizure_on)/1e6*fs)+[0]*int((clip_end-seizure_off)/1e6*fs)
    labels = labels+[0]*(df.shape[0]-len(labels))
    id = clips['admission_id'].iloc[idx]+'_{}'.format(idx+1)
    #df.to_pickle(save_path+id)

    edf_file_name = f"{id}.edf"
    edf_file_path = os.path.join(save_path, edf_file_name)

    # Save the EDF file
    with pyedflib.EdfWriter(edf_file_path, len(select_channels), file_type=pyedflib.FILETYPE_EDFPLUS) as f:
        channel_info = [{'label': ch, 'dimension': 'uV', 'sample_rate': fs, 'physical_min': np.min(df), 'physical_max': np.max(df), 'digital_min': -32768, 'digital_max': 32767} for ch in select_channels]
        f.setSignalHeaders(channel_info)
        f.writeSamples(df.T)

    print(f"Saved EDF file: {edf_file_path}")
    return df, id, labels



In [3]:
anno = pd.read_csv('/home1/k/kuangzy/scalp_eeg/seizure_annotations_UEO_revised.csv')
patient = list(set(anno['admission_id']))
seizure_num = []
for i in patient:
    seizure_num.append(list(anno['admission_id']).count(i))
print('The number of patients is:', len(patient))
print('The average number of clips per patients is:', np.mean(seizure_num))

patient_clip_num = {
    'admission_id': patient,
    'number of clips': seizure_num
}
patient_clip_num = pd.DataFrame(patient_clip_num)
print(patient_clip_num)


The number of patients is: 197
The average number of clips per patients is: 3.030456852791878
    admission_id  number of clips
0        EMU2202                1
1        EMU1699                2
2        EMU1963                3
3        EMU2157                1
4        EMU2163                3
..           ...              ...
192      EMU2219                1
193      EMU1881                2
194      EMU1800                1
195      EMU1877                3
196      EMU1807                2

[197 rows x 2 columns]


In [4]:
select_channels = ['C3', 'C4', 'Cz', 'F3', 'F4', 'F7', 'F8', 'Fp1', 'Fp2',
                    'Fz', 'O1', 'O2', 'P3', 'P4', 'T3', 'T4', 'T5', 'T6']
flag = []
for file in anno['ieeg_file_name']:
    with open('ieeglogin.bin', "r") as f:
        s = Session('joie1', f.read())
    ds = s.open_dataset(file)
    all_channel_labels = ds.get_channel_labels()
    check = True
    for i in select_channels:
        if i not in all_channel_labels:
            check = False
    flag.append(check)

KeyboardInterrupt: 

### transform tests

replicate raw data into 3 channels

In [10]:
import torch
a = get_iEEG_data(username = 'joie1',
                password_bin_file = 'ieeglogin.bin',
                iEEG_filename = 'EMU1061_Event_Day05_1',
                start_time_usec = 52551*1e6,
                stop_time_usec = 52553*1e6,
                select_electrodes = None)

"""a = a.to_numpy().T

a_expanded = np.repeat(a[np.newaxis, :, :], 3, axis=0)

# Step 2: Convert the expanded data into a PyTorch tensor
a_tensor = torch.tensor(a_expanded, dtype=torch.float32)
#a_tensor = a_tensor.permute(1,2,0)
print(a_tensor.shape) #(channel, time, 3)"""

'a = a.to_numpy().T\n\na_expanded = np.repeat(a[np.newaxis, :, :], 3, axis=0)\n\n# Step 2: Convert the expanded data into a PyTorch tensor\na_tensor = torch.tensor(a_expanded, dtype=torch.float32)\n#a_tensor = a_tensor.permute(1,2,0)\nprint(a_tensor.shape) #(channel, time, 3)'

In [None]:
labels = [0]*int((seizure_on-clip_start)/1e6*fs)+[1]*int((seizure_off-seizure_on)/1e6*fs)+[0]*int((clip_end-seizure_off)/1e6*fs)
if len(labels)<df.shape[0]:
    labels = labels+[0]*(df.shape[0]-len(labels))

In [9]:
a = [0,0,0]+[1]*2
a

[0, 0, 0, 1, 1]

### Replace empty clip_end time with the end of the recording

In [62]:
import csv

# Step 1: Read the CSV file
with open('seizure_annotations_UEO_Sep1224.csv', mode='r', newline='') as file:
    reader = csv.reader(file)
    rows = list(reader)  # Convert reader object to list

for row in rows:
    if row[3]:
        continue
    else:
        file_name = row[0]
        with open('/home1/k/kuangzy/scalp_eeg/ieeglogin.bin', "r") as f:
            s = Session('joie1', f.read())
        ds = s.open_dataset(file_name)
        all_channel_labels = ds.get_channel_labels()
        row[3] = int(ds.get_time_series_details(all_channel_labels[0]).duration)/1e6

with open('/home1/k/kuangzy/scalp_eeg/seizure_annotations_UEO_revised.csv', mode='w', newline='') as new_file:
    writer = csv.writer(new_file)
    
    # Write all modified rows to the new CSV
    writer.writerows(rows)

print("Modified rows have been written to 'modified_seizure_annotations.csv'.")


Modified rows have been written to 'modified_seizure_annotations.csv'.


In [11]:
import pyedflib
import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence
x = []
edf_reader = pyedflib.EdfReader('/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files_seizure_end/EMU2085_2.edf')
for chn in range(18):
    x.append(edf_reader.readSignal(chn))
x = np.array(x) # shape of (channel, time)
x = torch.from_numpy(x).float()
edf_reader.close()

In [15]:
from scipy.signal import butter, filtfilt
def bandpass(x, lowcut,highcut,fs):
    b, a = butter(4, [lowcut / (0.5 * fs), highcut / (0.5 * fs)], btype='band')
    return filtfilt(b, a, x)
y = bandpass(x,3,15,256)
print(x.shape)
print(y.shape)

torch.Size([18, 115200])
(18, 115200)


In [3]:
import torch
a = torch.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1])
torch.round(a)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])

In [45]:
import os
folder_path = "/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files_seizure_end"
edf_files = []
print('prepare data...')
for root, dirs, files in os.walk(folder_path):
    for f in files:
        edf_files.append(os.path.join(root, f))


prepare data...


In [31]:
edf_reader.close()

In [42]:
import pickle
id = 'EMU2255_3'
with open('/mnt/sauce/littlab/users/kuangzy/scalp_eeg/eeg_label_seizure_end.pkl', 'rb') as file: 
    labels = pickle.load(file) 
label = labels.loc[labels['id']==id]['labels'].item()
x = []
edf_reader = pyedflib.EdfReader('/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files_seizure_end/'+id+'.edf')
for chn in range(18):
    x.append(edf_reader.readSignal(chn))
x = np.array(x) # shape of (channel, time)
x = torch.from_numpy(x).float()
edf_reader.close()
print(len(label))
print(x.shape)

988960
torch.Size([18, 20736])


warning: EMU2255_Event_Day02_1 has wrong labeling!!
warning: EMU1816_Event_Day02_1 has wrong labeling!!
warning: EMU2100_Event_Day04_1 has wrong labeling!!

In [None]:
x =[]
for chn in range(18):
    x.append(edf_reader.readSignal(chn))
x = np.array(x) # shape of (channel, time)
x = torch.from_numpy(x).float()

In [9]:
labels = pd.read_pickle('eeg_label.pkl')
def seq_len(p):
    return len(p)
labels = pd.read_pickle('eeg_label.pkl')
labels = labels['labels'][:8].to_list()
max_len = len(max(labels,key = seq_len))

In [10]:
pad_labels = []
for i in labels:
    pad_labels.append(i+[0]*(max_len-len(i)))
pad_labels = np.array(pad_labels)
pad_labels = torch.tensor(pad_labels)
pad_labels = pad_labels.type(torch.FloatTensor)
pad_labels = pad_labels.permute(1,0)

In [11]:
temp_labels = pad_labels[10000:20000]
temp_labels = temp_labels.permute(1,0)
targets = [1 if i.mean()>0.2 else 0 for i in temp_labels]
targets = torch.tensor(targets).type(torch.LongTensor)

In [17]:
targets.unsqueeze(1).size()

torch.Size([8, 1])

In [9]:
data_path = '/home1/k/kuangzy/scalp_eeg/edf_files/*'
edf_files = glob.glob(data_path)
# split train and validation
p_train = 0.8
train_edf = random.sample(edf_files,int(p_train*len(edf_files)))
val_edf = [f for f in edf_files if f not in train_edf]

In [11]:
len(val_edf)

120

In [8]:
import torch
tensor = torch.tensor([[0,0,0,1,1,1,1,0,0,0]])
positions = torch.nonzero(tensor == 1, as_tuple=False)
a = positions[:,1].tolist()
a = [i/2 for i in a]
a

[1.5, 2.0, 2.5, 3.0]

In [1]:
import pandas as pd
files = pd.read_csv('seizure_annotations_UEO_revised.csv')

In [None]:
ictal_total = 0
non_ictal_total = 0
for i, row in files.iterrows():
    clip_start, clip_end = row['clip_start'], row['clip_end']
    seizure_on = row['onset_time']
    if row['offset_time_1'] > seizure_on:
        seizure_off = row['offset_time_1']
    elif row['offset_time_2'] > seizure_on:
        seizure_off = row['offset_time_2']
    else: seizure_off = row['offset_time_3']
    seizure = seizure_off-seizure_on
    normal = seizure_on-clip_start
    ictal_total += seizure
    non_ictal_total += normal

In [9]:
ictal_total/(non_ictal_total+ictal_total)

0.11193593119380296

In [37]:
import numpy as np
import pandas as pd
from ieeg.auth import Session
from numbers import Number
import torch
from torch.nn.utils.rnn import pad_sequence
import pyedflib
from scipy.signal import butter, filtfilt

def filtering(data,fs,bandpass):
    nyquist = fs/2
    low = bandpass[0] / nyquist
    high = bandpass[1] / nyquist
    b, a = butter(5, [low, high], btype='band')
    filtered_data = filtfilt(b, a, data)
    return filtered_data

x = []
edf_reader = pyedflib.EdfReader('/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files_seizure_end/EMU2085_2.edf')
for chn in range(18):
    x.append(edf_reader.readSignal(chn))
x = np.array(x) # shape of (channel, time)
x = torch.from_numpy(x).float()
edf_reader.close()

In [2]:
checkpoint = torch.load('onset_detection/checkpoints/best_model.pth')

  checkpoint = torch.load('onset_detection/model_checkpoint.pth')


In [1]:
import pickle
with open('/mnt/sauce/littlab/users/kuangzy/scalp_eeg/test_edf.pkl','rb') as f:
    test = pickle.load(f)
for f in test:
    f_ = f.replace('edf_files_seizure_end','edf_files')
    print(f_)

/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2039_2.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2174_2.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2138_17.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2233_2.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU1501_1.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2138_10.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU1456_1.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2162_5.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU1813_2.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU1881_2.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2321_1.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU2113_12.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU1712_7.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU1992_8.edf
/mnt/sauce/littlab/users/kuangzy/scalp_eeg/edf_files/EMU235