In [1]:
import os

import numpy as np
import statistics
import pyedflib
# import mne
import math


from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

from IPython.display import display, Markdown  #display(Markdown("# Hello World!"))
import Functions

In [2]:
# %matplotlib notebook            # Interactive website
plt.style.use('ggplot')

## Functions

### Read edf file

In [3]:
def readedf(path):
    f = pyedflib.EdfReader(path)
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()
    sigbufs = np.zeros((n, f.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[i,:] = f.readSignal(i)
    return sigbufs

### Feature Calculate

In [4]:
def feat_mean(array):
    return np.mean(array)
def feat_variance(array):
    return np.var(array, axis=0)
def feat_skewness(array):
    skewness = np.mean((array - np.mean(array))**3) / np.std(array)**3
    return skewness
def feat_kurtosis(array):
    kurtosis = np.mean((array - np.mean(array))**4) / np.std(array)**4
    return kurtosis
def feat_cov(array):
    CoV = (np.std(array)/np.mean(array))
    return CoV
def feat_mad(array):
    median = np.median(array)
    abs_deviation = np.abs(array-median)
    mad = np.median(abs_deviation)
    return mad
def feat_rms(array):
    rms_amplitude = np.sqrt(np.mean(np.square(array)))
    return rms_amplitude
def feat_shannon_entropy(sequence):
    uniqw, inverse = np.unique(sequence, return_inverse=True)
    event_counts = np.bincount(inverse)

    # Calculate probabilities
    total_events = len(sequence)
    event_probabilities = event_counts / total_events

    # Calculate Shannon entropy
    entropy = -np.sum(event_probabilities * np.log2(event_probabilities))
    return entropy
# array = (signal)
def get_features(array):
    all_features = np.zeros((1,8))
    all_features[0,0] = feat_mean(array)
    all_features[0,1] = feat_variance(array)
    all_features[0,2] = feat_skewness(array)
    all_features[0,3] = feat_kurtosis(array)
    all_features[0,4] = feat_cov(array)
    all_features[0,5] = feat_mad(array)
    all_features[0,6] = feat_rms(array)
    all_features[0,7] = feat_shannon_entropy(array)
    return all_features

### Sliding windows

In [5]:
# the unit of window size and window step is points(second * sample_rate)
# array = (signal)
def slide_windows(array, window_size, window_step):
    array_len = np.size(array)
    num_window = math.floor((array_len-window_size)/window_step)
    output = np.zeros((num_window, window_size))
    for i in range(num_window):
        output[i,:] = array[0 + window_step*i:window_size + window_step*i]
    return output

def channel_slide_windows(array, window_size, window_step): # array's format should be [#channel by #points]
    for i in range(np.size(array,axis=0)):
        if i == 0:
            temp = slide_windows(array = array[0,:], window_size= window_size,
                                window_step = window_step)
            output = np.zeros((np.size(array,axis=0), temp.shape[0], temp.shape[1]))
            output[i,:,:] = temp
        else:
            output[i,:,:] = slide_windows(array = array[i,:], window_size= window_size,
                                    window_step = window_step)
    return output # (#channel, #window, signal)

### Normalization_per_sample

In [6]:
def norm_per_sample(data):
    mean_per_sample = np.mean(data,axis=1, keepdims=True)
    std_dev_per_sample = np.std(data,axis=1, keepdims=True)
    
    normalized_data = (data - mean_per_sample) / std_dev_per_sample
    return normalized_data

## Read fold files name

In [7]:
# Read fold files name # patient 01
folder_path = "./Patient01/"

file_list = []

for filename in os.listdir(folder_path):                        # Read files name
    if os.path.isfile(os.path.join(folder_path, filename)):
        file_list.append(filename)

seizure_file_list = ['chb01-summary.txt', 'chb01_03.edf','chb01_03.edf.seizures',
                    'chb01_04.edf', 'chb01_04.edf.seizures', 'chb01_15.edf', 'chb01_15.edf.seizures',
                    'chb01_16.edf', 'chb01_16.edf.seizures','chb01_18.edf', 'chb01_18.edf.seizures',
                    'chb01_21.edf', 'chb01_21.edf.seizures', 'chb01_26.edf', 'chb01_26.edf.seizures']

final = []
for item in file_list:
    if item not in seizure_file_list:
        final.append(item)
        
patient_index = final
del final, seizure_file_list, file_list, filename

In [10]:
# Read fold files name   # patient 07
folder_path = "./Patient07/"

file_list = []

for filename in os.listdir(folder_path):                        # Read files name
    if os.path.isfile(os.path.join(folder_path, filename)):
        file_list.append(filename)

seizure_file_list = ['chb07-summary.txt', 'chb07_12.edf','chb07_12.edf.seizures',
                    'chb07_13.edf', 'chb07_13.edf.seizures', 'chb07_19.edf', 'chb07_19.edf.seizures']

final = []
for item in file_list:
    if item not in seizure_file_list:
        final.append(item)
        
patient_index = final
del final, seizure_file_list, file_list, filename

In [7]:
# Read fold files name  # patient 02
folder_path = "./Patient02/"

file_list = []

for filename in os.listdir(folder_path):                        # Read files name
    if os.path.isfile(os.path.join(folder_path, filename)):
        file_list.append(filename)

seizure_file_list = ['chb02-summary.txt', 'chb02_16.edf','chb02_16.edf.seizures',
                    'chb02_16+.edf', 'chb02_16+.edf.seizures', 'chb02_19.edf', 'chb02_19.edf.seizures']

final = []
for item in file_list:
    if item not in seizure_file_list:
        final.append(item)
        
patient_index = final
del final, seizure_file_list, file_list, filename

## Read files & Features & PCA & Save

In [8]:
# Define parameters
sample_rate = 256
    
for i in range(len(patient_index)):
    temp = readedf(folder_path+patient_index[i])
    print('successful read file', patient_index[i])
    
    temp_window = channel_slide_windows(temp,     # Intercept temp into window format
                     window_size=64*sample_rate, 
                      window_step=32*sample_rate)
    
    temp_window_features = np.zeros((np.size(temp_window,axis=0),       # Initiate window features
                                     np.size(temp_window,axis=1),
                                     8))
    
    for j in range(np.size(temp_window,axis=0)):                 # Calculate each window's features
        for k in range(np.size(temp_window, axis=1)):
           temp_window_features[j,k,:] = get_features(temp_window[j,k,:])
    
    temp_window_features_t = np.transpose(temp_window_features,[1,0,2])   # transpose matrix
    temp_window_features_t_f = temp_window_features_t.reshape(np.size(temp_window_features_t,axis=0), # reshape matrix (# sample, # features)
                                                              np.size(temp_window_features_t,axis=1)*\
                                                              np.size(temp_window_features_t,axis=2))
    
    n_components = 64
    pca = PCA(n_components=n_components)
    temp_window_features_t_f_pca = pca.fit_transform(temp_window_features_t_f)
    temp_window_features_t_f_pca_norm = norm_per_sample(temp_window_features_t_f_pca)
    
    np.save(patient_index[i].removesuffix('.edf')+'.npy', temp_window_features_t_f_pca_norm)
    print('successful store:', patient_index[i].removesuffix('.edf')+'.npy')

successful read file chb02_01.edf
successful store: chb02_01.npy
successful read file chb02_02.edf
successful store: chb02_02.npy
successful read file chb02_03.edf
successful store: chb02_03.npy
successful read file chb02_04.edf
successful store: chb02_04.npy
successful read file chb02_05.edf
successful store: chb02_05.npy
successful read file chb02_06.edf
successful store: chb02_06.npy
successful read file chb02_07.edf
successful store: chb02_07.npy
successful read file chb02_08.edf
successful store: chb02_08.npy
successful read file chb02_09.edf
successful store: chb02_09.npy
successful read file chb02_10.edf
successful store: chb02_10.npy
successful read file chb02_11.edf
successful store: chb02_11.npy
successful read file chb02_12.edf
successful store: chb02_12.npy
successful read file chb02_13.edf
successful store: chb02_13.npy
successful read file chb02_14.edf
successful store: chb02_14.npy
successful read file chb02_15.edf
successful store: chb02_15.npy
successful read file chb0