In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import os

from hypopredict.cv import CV_splitter
from hypopredict import chunker
from hypopredict import labeler
from hypopredict.params import TRAIN_DAYS

#### Split, Chunkify, Label, Filter, and Stack all Train Days

--> feature engineering

In [3]:
days_train = TRAIN_DAYS

In [4]:
ECG_PATH = os.getenv('ECG_PATH')

In [5]:
splitter = CV_splitter(n_splits = 5,
                       ecg_dir = ECG_PATH,
                       glucose_src='local',
                       random_state = 17)
splits = splitter.get_splits(days_train)

In [6]:
splits

array([[71, 21, 14, 63],
       [24, 44, 61, 13],
       [52, 54, 43, 22],
       [92, 11, 51, 74],
       [42, 93, 82, 72]])

In [7]:
days_train = splits[:4].ravel()

In [8]:
days_train.size

16

In [9]:
split_train = splits[:4]

In [10]:
split_train_chunkified = chunker.chunkify(split_train.ravel(),
                                     chunk_size=pd.Timedelta(minutes=30),
                                     step_size=pd.Timedelta(minutes=1),
                                     ecg_dir=ECG_PATH)


    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-12_35_54-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-20_29_57-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-17_43_12-2HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-07_01_03-2HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-06_32_58-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-16_49_30-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-61-15_08_04-0HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-61-06_46_44-0HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-52-09_23_58-0HG.feather', '/Users/ale

In [11]:
split_train_chunkified.keys()

dict_keys([71, 21, 14, 63, 24, 44, 61, 13, 52, 54, 43, 22, 92, 11, 51, 74])

In [13]:
split_train_labels = labeler.label_split(split_train_chunkified,
                                        glucose_src='local',
                                        forecast_window=pd.Timedelta(minutes=30))

Labeling day 71 with 904 chunks
Labeling day 21 with 463 chunks
Labeling day 14 with 434 chunks
Labeling day 63 with 837 chunks
Labeling day 24 with 732 chunks
Labeling day 44 with 673 chunks
Labeling day 61 with 1165 chunks
Labeling day 13 with 764 chunks
Labeling day 52 with 819 chunks
Labeling day 54 with 441 chunks
Labeling day 43 with 482 chunks
Labeling day 22 with 797 chunks
Labeling day 92 with 865 chunks
Labeling day 11 with 734 chunks
Labeling day 51 with 709 chunks
Labeling day 74 with 822 chunks


In [15]:
import hypopredict.chunk_preproc as cp

In [17]:
chunks_train, y_train = cp.filter_and_stack(
                            split_train_chunkified, split_train_labels
                        )

In [24]:
print('Number of chunks in training set:', len(chunks_train))
print('Number of labels in training set:', len(y_train))
print('The following chunk has label:', y_train[17])
chunks_train[17]

Number of chunks in training set: 9109
Number of labels in training set: 9109
The following chunk has label: 0


Unnamed: 0_level_0,EcgWaveform
datetime,Unnamed: 1_level_1
2014-10-01 11:07:43.435,2029
2014-10-01 11:07:43.439,2028
2014-10-01 11:07:43.443,2026
2014-10-01 11:07:43.447,2025
2014-10-01 11:07:43.451,2023
...,...
2014-10-01 11:37:43.415,2032
2014-10-01 11:37:43.419,2116
2014-10-01 11:37:43.423,2188
2014-10-01 11:37:43.427,2216


In [26]:
class_balance = y_train.mean().round(4)*100
print(f'Class balance (hypoglycemia %) in training set: {class_balance}%')

Class balance (hypoglycemia %) in training set: 11.66%


#### Congrats!

Now we have training chunks and training labels.

We can apply feature extraction and models like we did last week!

In [21]:
#TODO: resampling chunks to fix class imbalance