In [None]:
import os
import pathlib
import pyedflib
import numpy as np
import pandas as pd

## EDF Files Manipulation

### Read an edf file

In [None]:
'''
https://stackoverflow.com/questions/48784257/convert-eye-tracking-edf-file-to-asc-csv-format
https://pyedflib.readthedocs.io/en/latest/
https://www.edfplus.info/specs/edf.html
'''
def readEdfFile(pathToFile):
    f = pyedflib.EdfReader(pathToFile)
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()
    sigbufs = np.zeros((f.getNSamples()[0],n))
    for i in np.arange(n):
        sigbufs[:,i] = f.readSignal(i)
    f._close()
    del i,f,n,signal_labels
    return sigbufs

### Read an edf seizures file

In [None]:
'''
https://www.mathworks.com/matlabcentral/answers/225716-how-i-can-read-chb01_03-edf-seizures-file-from-chb-mit-database-in-matlab-as-i-am-using-this-file-f
Returns start time and length of the seizure
'''
def get_seizure_period(file_location):
    bytes_array = []
    for b in pathlib.Path(file_location).read_bytes(): bytes_array.append(b)
    return int(str(bin(bytes_array[38]))[2:]+str(bin(bytes_array[41]))[2:],2), bytes_array[49]

### Create dataframe from edf files

In [None]:
'''
List of numpy array, each position contains a patient's array of data
'''

def read_and_store_data (dataset_folder, sample_rate) :
    path = os.path.join(os.getcwd(), dataset_folder)
    os.chdir(dataset_folder)
    
    patients = [d for d in os.listdir() if os.path.isdir(d)]
    patients.sort()
    
    for p in patients:
        os.chdir(p)
        print('Reading data of patient', p)
        l=[]
        
        # for each patient specify the edf files and the seizure files
        edf = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('edf')]
        edf.sort()
        seizures = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('seizures')]
        seizures.sort()

        # edf files contain 23 columns/channels and a 24th column that indicates the seizure
        arr = np.zeros((1,24))
        for e in edf:
            sigbufs = readEdfFile(e)
            print('Reading data file', e, 'with', sigbufs.shape[0], 'records')
            sigbufs = np.append(sigbufs, np.zeros((sigbufs.shape[0],1)), axis=1)
            if seizures and seizures[0].startswith(e):
                (start, length) = get_seizure_period(seizures[0])
                print('Reading seizure file', seizures[0], ': (start =',start,'sec /',start*sample_rate,'records, length =',length,'sec /', (start+length)*sample_rate,'records)')
                for i in range(start*sample_rate, (start+length)*sample_rate+1): sigbufs[i][23] = 1
                seizures.pop(0)
            arr = np.concatenate([arr, sigbufs])
        arr = np.delete(arr, 22, axis=1)
        l.append(arr)
        os.chdir('..')
    os.chdir('..')
    print('')
    del path, patients, p, edf, seizures, arr, e, sigbufs, start, length, i
    
    df = pd.DataFrame(np.concatenate(l), columns=['FP1-F7','F7-T7','T7-P7', 'P7-O1','FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4','F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2','FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'seizure'])
    df.drop(0, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df