# Baseline model for all patients

In [4]:
from sz_utils import data_handler
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

# check if gpu is available
import tensorflow as tf

tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
# Data is in data folder, one csv per patient
def load_data(path: str) -> List[pd.DataFrame]:
    """Load data from folder. That folder contains the 
    result of scripts/make_dataset.py.

    :param path: path to the folder
    :type path: str
    :return: list of dataframes
    :rtype: List[pd.DataFrame]
    """
    dataframes = []
    for csv in os.listdir(path):
        dataframes.append(pd.read_csv(os.path.join(path, csv)))
    return dataframes

In [6]:
data = load_data("../data")
print(len(data))
print(data[0].shape)

16
(537600, 23)


In [7]:
data[0]

Unnamed: 0,FP1-F7,F7-T7,T7-P7,P7-O1,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,...,F8-T8,T8-P8-0,P8-O2,FZ-CZ,CZ-PZ,P7-T7,T7-FT9,FT9-FT10,FT10-T8,label
0,-0.781441,12.112332,-2.735043,-18.559219,-2.539683,-1.367521,14.847375,-20.708181,3.125763,-9.572650,...,-5.860806,4.493284,-5.665446,-2.735043,-10.354090,3.125763,-9.572650,-7.228327,-2.930403,1
1,-4.297924,8.009768,-1.562882,-16.214896,-3.125763,-2.344322,9.963370,-18.754579,2.930403,-12.698413,...,-12.503053,2.735043,-3.711844,-3.321123,-10.354090,1.953602,-6.251526,-4.493284,-7.814408,1
2,-3.907204,2.539683,-1.562882,-11.721612,-2.930403,-2.344322,6.251526,-15.824176,1.367521,-15.433455,...,-15.238095,-0.390720,-0.976801,-4.884005,-10.940171,1.953602,-8.986569,-5.079365,-4.493284,1
3,-7.228327,3.321123,-2.539683,-8.400488,-5.079365,-3.907204,8.400488,-13.479853,3.711844,-13.675214,...,-14.456654,-0.586081,-0.390720,-1.367521,-12.307692,2.930403,-8.009768,-5.470085,-6.446886,1
4,-7.619048,1.758242,-3.711844,-3.711844,-4.884005,-5.274725,4.688645,-7.814408,4.102564,-12.698413,...,-14.652015,0.390720,-0.586081,4.493284,-12.307692,4.102564,-2.735043,-4.297924,-12.503053,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537595,-2.344322,8.400488,-0.781441,-3.907204,12.307692,-8.009768,-6.056166,2.930403,12.893773,-9.377289,...,10.744811,5.470085,-6.837607,15.628816,-25.006105,1.172161,0.781441,-3.907204,12.307692,0
537596,1.172161,9.963370,-0.976801,-8.009768,6.056166,-1.562882,-7.619048,4.688645,2.735043,-14.456654,...,11.526252,-1.953602,-4.102564,-24.224664,14.847375,1.367521,-2.148962,4.493284,4.297924,0
537597,-2.344322,-3.321123,-5.470085,-4.297924,-1.758242,-5.860806,-9.181929,1.367521,-1.758242,-15.042735,...,3.516484,-1.562882,-6.837607,-16.996337,2.344322,5.860806,4.102564,8.986569,-3.516484,0
537598,-2.344322,15.042735,-9.377289,-9.377289,7.619048,-6.446886,-8.791209,0.390720,5.665446,-14.065934,...,2.735043,-1.172161,-1.758242,-12.112332,-0.195360,9.768010,-8.986569,6.251526,-1.562882,0


Remember the data is stored in the following format:
- Window's size is (256/2)*5 = 640
- Each window has a label (preictal or interictal)

In [10]:
def get_windows(data: List[pd.DataFrame]) -> List[pd.DataFrame]:
    """Get windows from dataframes.

    :param data: list of dataframes
    :type data: List[pd.DataFrame]
    :return: list of windows
    :rtype: List[np.ndarray]
    """
    windows = []
    # N is the length of each window
    N = 640

    for df in data:
        # number of windows
        num_splits = len(df) // N + (len(df) % N > 0)

        # split the dataframe into windows
        df_splits = [df.iloc[i*N:(i+1)*N] for i in range(num_splits)]

        # add the windows to the list
        windows.append(df_splits)

    return windows

In [11]:
windows = get_windows(data)

In [19]:
print("How many patients:", len(windows))
print("How many windows for one patient:", len(windows[0]))
print("Shape of each window:", windows[0][0].shape)

How many patients: 16
How many windows for one patient: 840
Shape of each window: (640, 23)
