# EDF Files Splitting

By: Adam Li

3/26/19: In order to allow facilitation of preprocessing of edf files into fif+json pairs, certain edf files need to be either split, or they need to be combined into one file in order to facilitate further downstream analysis.

E.g.
- EDF file with multiple seizures -> {edf_file01, edf_file02, ...}
- one seizure split into multiple edf files -> {edf_file}



In [1]:
import pyedflib # pip install pyedflib
from datetime import datetime
import mne
import os
import sys
import numpy as np
from pprint import pprint

sys.path.append('../../')

import eztrack
from eztrack.edp.format.formatter_raw import ConvertEDFiEEG
from eztrack.edp.loaders.dataset.timeseries.ieegrecording import iEEGRecording
from eztrack.edp.utils.utils import writejsonfile

import matplotlib.pyplot as plt
import seaborn as sns
import timeit
# Import magic commands for jupyter notebook 
# - autoreloading a module
# - profiling functions for memory usage and scripts
%load_ext autoreload
%autoreload 2

In [3]:
class SplitEDF():
    def __init__(self, edffile, dataset_ids):
        self.file = edffile
        self.dataset_ids = dataset_ids
            
    def load_file(self):
        # initialize converter
        edfconverter = ConvertEDFiEEG(datatype='ieeg')
        # load in the dataset and create metadata object
        edfconverter.load_file(filepath=self.file)

        # load in info data structure and edf annotated events
        edfconverter.extract_info_and_events(pat_id=patid, autofind_markers=False)

        return edfconverter
    
    def split_datasets(self):
        # go through each datasetid string and find it in events
        for dataset_id in self.dataset_ids:
            pass
    def _update_dict(self, master_dict, appendage_dict):
        TIME_DEPENDENT_KEYS = ['length_of_recording',
                               'events', 
                               'onset', 
                               'termination']

        prevlen = master_dict['length_of_recording']
        # samplerate = master_dict['samplerate']
        samplerate = self.samplerate
        prevsec = self._convert_sec(prevlen, samplerate)

        # print("Lengths of recordings: ", prevlen, samplerate, prevsec)
        for key in appendage_dict.keys():
            if key in TIME_DEPENDENT_KEYS:
                if key == 'length_of_recording':
                    master_dict[key] = appendage_dict[key] + prevlen
                elif key == 'onset' or key == 'termination':
                    master_dict[key] = appendage_dict[key] + prevsec
                elif key == 'events':
                    master_dict[key] = self._concat_events(master_dict[key],
                                                           appendage_dict[key],
                                                           prevsec)
            if key not in master_dict.keys():
                master_dict[key] = appendage_dict[key]

        return master_dict

    def _convert_sec(self, index, samplerate):
        return np.divide(index, samplerate)

    def _concat_events(self, events_list, new_events, recording_length_seconds):
        for event in new_events:
            new_event = event
            new_event[0] = float(new_event[0]) + recording_length_seconds
            events_list = np.concatenate(
                (events_list, np.expand_dims(new_event, axis=0)), axis=0)
            
        return events_list
    
    def save_fif(self, fif_raw, dataset_metadata, datafilepath, replace=False):
        """
        Conversion function for the rawdata + metadata into a .fif file format with accompanying metadata .json
        object.

        rawdata + metadata_dict -> .fif + .json

        :param newfilepath:
        :param dataset_metadata:
        :param replace:
        :return:
        """
        # create a new information structure
        rawdata = fif_raw.get_data(return_times=False)
        assert rawdata.shape[0] == dataset_metadata['number_chans']

        fif_raw.save(datafilepath,
                     overwrite=replace,
                     verbose='ERROR')

        # create a filepath for the json object
        dataset_metadata['filename'] = os.path.basename(datafilepath)
        newmetafilepath = datafilepath.replace('_raw.fif', '.json')

        # save the formatted metadata json object
        writejsonfile(dataset_metadata, newmetafilepath, overwrite=replace)


## Data Directories

In [4]:
center = 'clevelandtvb'
patid = 'nl22'
patid = 'tvb11'
modality = 'seeg'
patdir = os.path.join(f"/Users/adam2392/Downloads/tngpipeline/{center}/{patid}/{modality}/edf/")
patdir = os.path.join(f"/home/adam2392/hdd/data/rawdata/{center}/{patid}/{modality}/edf/")

In [5]:
orig_dataset_ids = {
    "sz_8p_-_ncs": ["8p", "ncs"],
    "sz_9p_-_10p": ["9p", "10p"],
    "sz_11p_-_12p": ["11p", "12p"],
    "sz_14p_-_15p_-_16p": ["14p", "15p", "16p"]
}
datadir = os.path.join(patdir, f"split/")
edffiles = [os.path.join(datadir,f) for f in os.listdir(datadir) if f.endswith('.edf')]

print(edffiles)

['/home/adam2392/hdd/data/rawdata/clevelandtvb/tvb11/seeg/edf/split/TVB11_SEEG_SZ_11P_-_12P.edf', '/home/adam2392/hdd/data/rawdata/clevelandtvb/tvb11/seeg/edf/split/TVB11_SEEG_SZ_9P_-_10P.edf', '/home/adam2392/hdd/data/rawdata/clevelandtvb/tvb11/seeg/edf/split/TVB11_SEEG_SZ_8P_-_NCS.edf', '/home/adam2392/hdd/data/rawdata/clevelandtvb/tvb11/seeg/edf/split/TVB11_SEEG_SZ_14P_-_15P_-_16P.edf']


# Split EDF File

In [6]:
for fpath in edffiles:
    edffilename = os.path.basename(fpath).lower()
    edf_datasetid = "sz" + edffilename.split("_sz")[1]
    
    datasetids = orig_dataset_ids[os.path.splitext(edf_datasetid)[0]]
    
#     f = pyedflib.EdfReader(fpath)
#     print(f.readAnnotations())
# #     pyedflib.close_file(f)
    
#     f._close()
    splitter = SplitEDF(fpath, datasetids)
    edfconverter = splitter.load_file()
    
    break

Used Annotations descriptions: ['+0.000000', '+121.810000', '+128.752000', '+128.903000', '+133.614000', '+144.769000', '+334.464000', '+343.380000', '+344.255000', '+357.795000', '+86.177000', 'A1+A2 OFF', 'END', 'SPK run F', 'SZ 11P (good video)', 'SZ 12P', 'Segment: REC START Tech-Bi EE', 'close eyes', 'end', 'moves pelvice', 'right face tonic']


In [33]:
def find_end(startindex, eventtimes, eventnames, idlist):
#     print(startindex)
    # go from index to end of the event times
    for i in range(startindex, eventtimes.shape[0]):
        eventid = eventtimes[i,2]
        
        # get eventidind
        eventidind = np.where(idlist == eventid)[0][0]
        eventname = eventnames[eventidind]
        
        print(eventname)
        if "end" in eventname:
            return i
    return None

In [41]:
# print(datasetids)

for datasetid in datasetids:
    datasetid = "sz_" + datasetid
    
    # get np.array of eventtimes
    eventtime_arr = edfconverter.event_times
    
    # get eventids and preprocess the names
    event_ids = edfconverter.event_ids
    eventnames = list(event_ids.keys())
    idlist = [event_ids[name] for name in eventnames]
    eventnames = ["_".join(name.lower().split(" ")) for name in eventnames]
    
    # get index for dataset start
    dataset_startind = [ind for ind, name in enumerate(eventnames) if datasetid in name][0]
    
    # get the id
    startid = idlist[dataset_startind]
    startname = eventnames[dataset_startind]
    
    # get the index for id
    startind = np.where(eventtime_arr[:,2] == startid)[0][0]

    # find the end of this dataset
    endind = find_end(startind, eventtime_arr, eventnames, idlist)
    
    print(eventtime_arr[startind, :])
#     print(datasetstart)
    print(startid, startname)
    
    print(startind, endind)
    print(eventtime_arr[endind, :])
    print(idlist[endind], eventnames[endind])
    
#     pprint(event_ids)
    
    break

sz_11p_(good_video)
+128.752000
close_eyes
+128.903000
moves_pelvice
+133.614000
right_face_tonic
+144.769000
end
[121760      0     15]
15 sz_11p_(good_video)
6 14
[144640      0     19]
15 sz_11p_(good_video)


In [46]:
print(eventnames[dataset_startind])
print(eventnames[endind-1])
# print(eventtime_arr)
# print(idlist)
# pprint(eventnames)

sz_11p_(good_video)
spk_run_f


In [53]:
import mne
event_times, event_ids = mne.events_from_annotations(edfconverter.raw, 
                                                     verbose=True,
#                                                      regexp=None,
                                                    event_id=None)

print(event_ids)


Used Annotations descriptions: ['+0.000000', 'A1+A2 OFF', 'Segment: REC START Tech-Bi EE']
{'+0.000000': 1, 'A1+A2 OFF': 2, 'Segment: REC START Tech-Bi EE': 3}


In [54]:
print(event_times)

[[0 0 1]
 [0 0 3]
 [0 0 2]]


In [35]:

print(edfconverter.filepath)
print(edfconverter.event_times, edfconverter.event_ids)

/home/adam2392/hdd/data/rawdata/clevelandtvb/tvb11/seeg/edf/split/TVB11_SEEG_SZ_11P_-_12P.edf
[[0 0 1]
 [0 0 3]
 [0 0 2]] {'+0.000000': 1, 'A1+A2 OFF': 2, 'Segment: REC START Tech-Bi EE': 3}


# Resave EDF File and Load In

In [None]:
dataset_ids = ["sz_4p", "sz_5p"]

for dataset_id in dataset_ids:
    fname = os.path.join(patdir, f"{patid}_{dataset_id}.edf")
    events_list = combined_metadata['events']

    write_edf(combined_rawfif, fname, events_list, picks=None, 
              tmin=0, tmax=None, overwrite=True)
    

In [None]:
rawedf = mne.io.read_raw_edf(fname, preload=True)

print(rawedf)

rawdata = rawedf.get_data()
print(rawdata.shape)

fig, ax = plt.subplots(1,1, figsize=(20,20))
for i in range(len(combined_rawfif.ch_names)):
    ax.plot(np.r_[i]+rawdata[i,20000:20400] / max(rawdata[i,20000:20400]))
    
    break
#     if i == 10:
#         break

# Write Combined FIF Into Another EDF File