In [None]:
import os
import pathlib
import pyedflib
import numpy as np
import pandas as pd
import scipy.stats as spst
from pyentrp import entropy as ent
import sklearn.metrics as metr

## Experiment's parameters

In [None]:
# Should replan sample rate. No seizure happens.
sample_rate = 256
time_window = 5
step = time_window*sample_rate

## EDF Files Manipulation

### Read an edf file

In [None]:
'''
https://stackoverflow.com/questions/48784257/convert-eye-tracking-edf-file-to-asc-csv-format
https://pyedflib.readthedocs.io/en/latest/
https://www.edfplus.info/specs/edf.html
'''
def readEdfFile(pathToFile):
    f = pyedflib.EdfReader(pathToFile)
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()
    sigbufs = np.zeros((f.getNSamples()[0],n))
    for i in np.arange(n):
        sigbufs[:,i] = f.readSignal(i)
    f._close()
    del i,f,n,signal_labels
    return sigbufs

### Read an edf seizures file

In [None]:
'''
https://www.mathworks.com/matlabcentral/answers/225716-how-i-can-read-chb01_03-edf-seizures-file-from-chb-mit-database-in-matlab-as-i-am-using-this-file-f
Returns start time and length of the seizure
'''
def get_seizure_period(file_location):
    bytes_array = []
    for b in pathlib.Path(file_location).read_bytes(): bytes_array.append(b)
    return int(str(bin(bytes_array[38]))[2:]+str(bin(bytes_array[41]))[2:],2), bytes_array[49]

## List of numpy array, each position contains a patient's array of data

In [None]:
def read_and_store_data (dataset_folder, sample_rate) :
    path = os.path.join(os.getcwd(), dataset_folder)
    os.chdir(dataset_folder)
    
    patients = [d for d in os.listdir() if os.path.isdir(d)]
    patients.sort()
    
    for p in patients:
        os.chdir(p)
        print('Reading data of patient', p)
        l=[]
        
        # for each patient specify the edf files and the seizure files
        edf = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('edf')]
        edf.sort()
        seizures = [f for f in os.listdir() if os.path.isfile(f) and f.endswith('seizures')]
        seizures.sort()

        # edf files contain 23 columns/channels and a 24th column that indicates the seizure
        arr = np.zeros((1,24))
        for e in edf:
            sigbufs = readEdfFile(e)
            print('Reading data file', e, 'with ', sigbufs.shape[0], 'records')
            sigbufs = np.append(sigbufs, np.zeros((sigbufs.shape[0],1)), axis=1)
            if seizures and seizures[0].startswith(e):
                (start, length) = get_seizure_period(seizures[0])
                print('Reading seizure file', seizures[0], ': (start =',start,'sec, length =',length,'sec) or', '(start =',start*sample_rate,'record, end =',(start+length)*sample_rate,'record)')
                #print ('start =', start*sample_rate, ', end:', (start+length)*sample_rate)
                for i in range(start*sample_rate, (start+length)*sample_rate+1): sigbufs[i][23] = 1
                seizures.pop(0)
            arr = np.concatenate([arr, sigbufs])
        arr = np.delete(arr, 22, axis=1)
        l.append(arr)
        os.chdir('..')
    os.chdir('..')
    
    del path, patients, p, edf, seizures, arr, e, sigbufs, start, length, i
    
    df = pd.DataFrame(np.concatenate(l), columns=['FP1-F7','F7-T7','T7-P7', 'P7-O1','FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4','F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8', 'P8-O2','FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8', 'seizure'])
    df.drop(0, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

## Read the data and store it to a dataframe

In [None]:
df = read_and_store_data('TestData', sample_rate)
#df = read_and_store_data('CHB-MIT-Dataset', sample_rate)

## Feature Extraction

### Left and Right Hemispheric Channel Information

In [None]:
ndf = pd.DataFrame()
ndf['AvgLeftHemisphere'] = (df['F3-C3']+df['C3-P3'])/2
ndf['AvgRightHemisphere'] = (df['F4-C4']+df['C4-P4'])/2
ndf['seizure'] = df['seizure']
del df

### Time Domain Features

In [None]:
'''
https://stackoverflow.com/questions/30272538/python-code-for-counting-number-of-zero-crossings-in-an-array
https://stackoverflow.com/questions/5613244/root-mean-square-in-numpy-and-complications-of-matrix-and-arrays-of-numpy
'''
def ComputeTimeDomainFeatures(featvec, signal, count, tag):
    features.loc[count, tag + 'mean'] = np.mean(signal)
    features.loc[count, tag + 'var'] = np.var(signal)
    features.loc[count, tag + 'skew'] = spst.skew(signal)
    features.loc[count, tag + 'kurt'] = spst.kurtosis(signal)
    features.loc[count, tag + 'std'] = np.std(signal)
    features.loc[count, tag + 'median'] = np.median(signal)
    features.loc[count, tag + 'zcr'] = ((signal[:-1] * signal[1:]) < 0).sum()
    features.loc[count, tag + 'cv'] = spst.variation(signal)
    features.loc[count, tag + 'rms'] = np.sqrt(signal.dot(signal)/signal.size)
    features.loc[count, tag + 'p2p'] = signal.max() - signal.min()
    features.loc[count, tag + 'SampEn'] = ent.sample_entropy(signal, 1)[0]

### Correlation Features

In [None]:
def ComputeCorrelation (features, left, right, count):
    features.loc[count, 'maxAbsCrossCorr'] = abs(np.correlate(left, right, 'full')).max()

In [None]:
features = pd.DataFrame()
count = 0
for i in range (0, ndf.shape[0], step):
    temp = ndf.iloc[i:i+step]
    left = np.array(temp['AvgLeftHemisphere'])
    right = np.array(temp['AvgRightHemisphere'])
    
    # Time Domain Features
    ComputeTimeDomainFeatures(features, left, count, 'L')
    ComputeTimeDomainFeatures(features, right, count, 'R')
   
    # Frequency Domain Features

    features.loc[count, 'maxFreq'] = 
    
    # Correlation Features
    ComputeCorrelation(features, left, right, count)
    
    
    features.loc[count, 'seizure'] = temp['seizure'].value_counts().idxmax()
    count = count + 1

## Classification performance indexes 

In [None]:
'''
TP : the number of segments that are correctly identified as ictal (x_true == x_pred == 1)
TN : the number of EEG segments that are correctly classified as non-ictal (x_true == x_pred == 0)
FP : the number of EEG segments that are incorrectly classified as ictal (x_true == 0 && x_pred == 1)
FN : the segments that are incorrectly classified as non-ictal (x_true == 1 && x_pred == 0)
'''
def metrics (true_arr, pred_arr):
    (tn, fp, fn, tp) = metr.confusion_matrix(true_arr, pred_arr).ravel()
    acc = metr.accuracy_score(true_arr, pred_arr)
    snv = tp/(tp + fn)                                          # Sensitivity or True Positive Rate (TPR)
    spc = tn/(tn + fp)                                          # Specificity or True Negative Rate (TNR)
    ppv = tp/(tp + fp)                                          # Precision or Positive Predictive Value (PPV)
    f1 = metr.f1_score(true_arr, pred_arr)
    mcc = metr.matthews_corrcoef(true_arr, pred_arr)
    kappa = metr.cohen_kappa_score(true_arr, pred_arr)          # Cohen’s Kappa
    return acc, snv, spc, ppv, f1, mcc, kappa