In [None]:
import pywt
import math
import numpy as np
import pandas as pd
from pyentrp import entropy
from tqdm.notebook import tqdm
from scipy import integrate, signal, stats

## Feature Extraction

### Dataframes  for the Experiments

#### Left and Right Hemispheric Channel Information

In [None]:
def leftRightHemisphericChannels (df):
    ndf = pd.DataFrame()
    ndf['AvgLeftHemisphere'] = (df['F3-C3'] + df['C3-P3'])/2
    ndf['AvgRightHemisphere'] = (df['F4-C4'] + df['C4-P4'])/2
    ndf['seizure'] = df['seizure']
    return ndf

#### Average Channel Information

In [None]:
def averageChannels (df):
    ndf = pd.DataFrame()
    n = df.iloc[:, :df.shape[1]-1].copy()
    ndf['surrogate'] = n.mean(axis=1)
    ndf['seizure'] = df['seizure']
    return ndf

### Time Domain Features

In [None]:
'''
https://stackoverflow.com/questions/30272538/python-code-for-counting-number-of-zero-crossings-in-an-array
https://stackoverflow.com/questions/5613244/root-mean-square-in-numpy-and-complications-of-matrix-and-arrays-of-numpy
'''
def computeTimeDomainFeatures (signal):
    mean = np.mean(signal)
    var = np.var(signal)
    skew = stats.skew(signal)
    kurt = stats.kurtosis(signal)
    std = np.std(signal)
    median = np.median(signal)
    zcr = ((signal[:-1] * signal[1:]) < 0).sum()
    if signal.mean() != 0: cv = stats.variation(signal)
    else : cv = math.nan
    if signal.size > 0: rms = np.sqrt(signal.dot(signal)/signal.size)
    else: rms = math.nan
    p2p = signal.max() - signal.min()
    sampEn = entropy.sample_entropy(signal, 1)[0]
    return mean, var, skew, kurt, std, median, zcr, cv, rms, p2p, sampEn

### Frequency Features

In [None]:
'''
Compute the average bandpower of an EEG signal
https://raphaelvallat.com/bandpower.html
'''
def psd (x, fs, win):
    bands = [0.5, 4, 8, 12, 30, 100]
    freqs, psd = signal.welch(x, fs, nperseg = win)
    avg_power=[]
    while len(bands)>1:
        idx = np.logical_and(freqs >= bands[0], freqs <= bands[1])
        power_simps = integrate.simps(psd[idx], dx=bands[1]-bands[0])
        avg_power.append(power_simps)
        bands = np.copy(bands[1:])
    for p in avg_power:
        yield p

### Correlation Features

In [None]:
def computeCorrelation (left, right):
    return abs(np.correlate(left, right, 'full')).max()

### Feature computation

In [None]:
def featureExtractionLeftRight (df, sample_rate, step):
    print('Feature Extraction')
    ft = pd.DataFrame()
    c = 0
    for i in tqdm(range (0, df.shape[0], step)):
        temp = df.iloc[i:i+step]
        left = np.array(temp['AvgLeftHemisphere'])
        right = np.array(temp['AvgRightHemisphere'])

        # Time Domain Features
        ft.loc[c,'Lmean'], ft.loc[c,'Lvar'], ft.loc[c,'Lskew'],ft.loc[c,'Lkurt'], ft.loc[c,'Lstd'], ft.loc[c,'Lmedian'], ft.loc[c,'Lzcr'], ft.loc[c,'Lcv'], ft.loc[c,'Lrms'], ft.loc[c,'Lp2p'],ft.loc[c,'LsampEn'] = computeTimeDomainFeatures(left)
        ft.loc[c,'Rmean'], ft.loc[c,'Rvar'], ft.loc[c,'Rskew'],ft.loc[c,'Rkurt'], ft.loc[c,'Rstd'], ft.loc[c,'Rmedian'], ft.loc[c,'Rzcr'], ft.loc[c,'Rcv'], ft.loc[c,'Rrms'], ft.loc[c,'Rp2p'],ft.loc[c,'RsampEn'] = computeTimeDomainFeatures(right)

        # Frequency Domain Features
        ft.loc[c,'LdeltaPower'], ft.loc[c,'LthetaPower'], ft.loc[c,'LalphaPower'], ft.loc[c,'LbetaPower'], ft.loc[c,'LgammaPower'] = psd(left, sample_rate, left.shape[0])
        ft.loc[c,'RdeltaPower'], ft.loc[c,'RthetaPower'], ft.loc[c,'RalphaPower'], ft.loc[c,'RbetaPower'], ft.loc[c,'RgammaPower'] = psd(right, sample_rate, right.shape[0])

        
        # Correlation Features
        ft.loc[c, 'corr'] = computeCorrelation(left, right)

        ft.loc[c, 'seizure'] = temp['seizure'].value_counts().idxmax()
        c = c + 1
    return ft

In [None]:
def featureExtractionAverage (df, sample_rate, step):
    print('Feature Extraction')
    ft = pd.DataFrame()
    c = 0
    for i in tqdm(range (0, df.shape[0], step)):
        temp = df.iloc[i:i+step]
        s = np.array(temp['surrogate'])
        
        # Time Domain Features
        ft.loc[c,'mean'], ft.loc[c,'var'], ft.loc[c,'skew'],ft.loc[c,'kurt'], ft.loc[c,'std'], ft.loc[c,'median'], ft.loc[c,'zcr'], ft.loc[c,'cv'], ft.loc[c,'rms'], ft.loc[c,'p2p'],ft.loc[c,'sampEn'] = computeTimeDomainFeatures(s)
       
        # Frequency Domain Features
        ft.loc[c,'deltaPower'], ft.loc[c,'thetaPower'], ft.loc[c,'alphaPower'], ft.loc[c,'betaPower'], ft.loc[c,'gammaPower'] = psd(s, sample_rate, s.shape[0])
        
        ft.loc[c, 'seizure'] = temp['seizure'].value_counts().idxmax()
        c = c + 1
    return ft

In [None]:
def featureExtractionFull (df, sample_rate, step):
    print('Feature Extraction')
    ft = pd.DataFrame()
    c = 0
    for i in tqdm(range (0, df.shape[0], step)):
        temp = df.iloc[i:i+step]
        for j in range(0, df.shape[1]-1):
            s = np.array(temp.iloc[:, j])

            # Time Domain Features
            ft.loc[c, 'mean'+str(j)], ft.loc[c, 'var'+str(j)], ft.loc[c, 'skew'+str(j)],ft.loc[c, 'kurt'+str(j)], ft.loc[c, 'std'+str(j)], ft.loc[c, 'median'+str(j)], ft.loc[c, 'zcr'+str(j)], ft.loc[c, 'cv'+str(j)], ft.loc[c, 'rms'+str(j)], ft.loc[c, 'p2p'+str(j)],ft.loc[c, 'sampEn'+str(j)] = computeTimeDomainFeatures(s)

            # Frequency Domain Features
            ft.loc[c, 'deltaPower'+str(j)], ft.loc[c, 'thetaPower'+str(j)], ft.loc[c, 'alphaPower'+str(j)], ft.loc[c, 'betaPower'+str(j)], ft.loc[c, 'gammaPower'+str(j)] = psd(s, sample_rate, s.shape[0])

        ft.loc[c, 'seizure'] = temp['seizure'].value_counts().idxmax()
        c = c + 1
    return ft