In [None]:
import os
import shutil
import pathlib
import pyedflib
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

## EDF Files Manipulation

### Read an edf file

In [None]:
'''
https://stackoverflow.com/questions/48784257/convert-eye-tracking-edf-file-to-asc-csv-format
https://pyedflib.readthedocs.io/en/latest/
https://www.edfplus.info/specs/edf.html
'''
def readEdfFile(pathToFile, channels):
    f = pyedflib.EdfReader(pathToFile)
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()
    signal_labels.append('seizure')
    sigbufs = np.zeros((f.getNSamples()[0],n+1))
    for i in np.arange(n): sigbufs[:,i] = f.readSignal(i)
    sigbufs[:, n]= 0.0
    df =  pd.DataFrame(data = sigbufs, columns = signal_labels)
    df = df.loc[:, channels]
    df = df.loc[:, ~df.columns.duplicated()]
    f._close()
    return df.values

### Read an edf seizures file

In [None]:
'''
https://www.mathworks.com/matlabcentral/answers/225716-how-i-can-read-chb01_03-edf-seizures-file-from-chb-mit-database-in-matlab-as-i-am-using-this-file-f
Returns start time and length of the seizure
'''
def get_seizure_period(file_location):
    bytes_array = []
    for b in pathlib.Path(file_location).read_bytes(): bytes_array.append(b)
    return int(str(bin(bytes_array[38]))[2:]+str(bin(bytes_array[41]))[2:],2), bytes_array[49]

### Create per patient csv

In [None]:
'''
List of numpy array, each position contains a patient's array of data
'''
def create_per_patient_csv (dataset_folder, sample_rate, channels) :
    initial_path = os.getcwd()
    os.chdir(dataset_folder)
    
    patients = [d for d in os.listdir() if os.path.isdir(d) and d.startswith('chb')]
    patients.sort()
    for p in patients:
        os.chdir(p)
        print('Reading data of patient', p)
        
        # for each patient specify the edf files and the seizure files
        edf = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('edf')]
        edf.sort()
        seizures = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('seizures')]
        seizures.sort()
        arr = np.array([], dtype=np.float64).reshape(0, len(channels))
        for e in tqdm(edf):
            sigbufs = readEdfFile(e, channels)
            if seizures and seizures[0].startswith(e):
                (start, length) = get_seizure_period(seizures[0])
                for i in range(start*sample_rate, (start+length)*sample_rate+1): sigbufs[i][len(channels)-1] = 1.0
                seizures.pop(0)
            arr = np.concatenate([arr, sigbufs])
        df = pd.DataFrame(arr, columns = channels)
        df.to_csv(p+'.csv', index = False)
        os.chdir('..')
    os.chdir(initial_path)

### Create dataframe from edf files

In [None]:
'''
List of numpy array, each position contains a patient's array of data
'''
def read_and_store_data (dataset_folder, sample_rate, channels) :
    initial_path = os.getcwd()
    os.chdir(dataset_folder)
    
    patients = [d for d in os.listdir() if os.path.isdir(d) and d.startswith('chb')]
    patients.sort()
    arr = np.array([], dtype=np.float64).reshape(0, len(channels))
    for p in patients:
        os.chdir(p)
        print('Reading data of patient', p)
        
        # for each patient specify the edf files and the seizure files
        edf = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('edf')]
        edf.sort()
        seizures = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('seizures')]
        seizures.sort()
        for e in tqdm(edf):
            sigbufs = readEdfFile(e, channels)
            if seizures and seizures[0].startswith(e):
                (start, length) = get_seizure_period(seizures[0])
                for i in range(start*sample_rate, (start+length)*sample_rate+1): sigbufs[i][len(channels)-1] = 1.0
                seizures.pop(0)
        arr = np.concatenate([arr, sigbufs])
        os.chdir('..')
    os.chdir(initial_path)
    
    df = pd.DataFrame(arr, columns = channels)
    df.reset_index(drop = True, inplace = True)
    return df