In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import os

from hypopredict.cv import CV_splitter
from hypopredict import chunker
from hypopredict import labeler
from hypopredict.params import TRAIN_DAYS

from hypopredict.cv import CrossValidator

In [3]:
ECG_PATH = os.getenv('ECG_PATH')

#######
# chunking strategy
CHUNK_SIZE = pd.Timedelta(minutes=60)
STEP_SIZE = pd.Timedelta(minutes=10)

#######
# labeling strategy
FORECAST_WINDOW = pd.Timedelta(minutes=90)


######
# rolling features
WINDOW_SIZE_FEATURES = pd.Timedelta(minutes=15)
STEP_SIZE_FEATURES = pd.Timedelta(minutes=15)


# initialize CV splitter
splitter = CV_splitter(n_splits = 5,
                       ecg_dir = ECG_PATH,
                       glucose_src='local',
                       random_state = 17)
# get splits
splits = splitter.get_splits(TRAIN_DAYS)

crossval = CrossValidator(splits = splits)

splits_prepped = crossval.chunkify_label_stack(
    chunk_size=CHUNK_SIZE,
    step_size=STEP_SIZE,
    ecg_dir=ECG_PATH,
    glucose_src='local',
    forecast_window=FORECAST_WINDOW,
    roll_window_size=WINDOW_SIZE_FEATURES,
    roll_step_size=STEP_SIZE_FEATURES,
    suffix='roll15m',
    agg_funcs=['mean', 'std', 'min', 'max']
)

print(crossval._get_split_mean_labels(splits_prepped))

print(type(splits_prepped))


    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-12_35_54-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-20_29_57-1HG.feather']
Labeling day 71 with 91 chunks
Labeling day 21 with 47 chunks
Labeling day 14 with 44 chunks
Labeling day 63 with 84 chunks

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-17_43_12-2HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-07_01_03-2HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-06_32_58-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-16_49_30-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-61-15_08_04-0HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-61-06_46_44-0HG.feather']
Labeling day 24 

In [4]:
# initialize XGBoost model
from xgboost import XGBClassifier
model = XGBClassifier(
    n_estimators=555,
    max_depth=5,
    learning_rate=0.1,
    eval_metric='logloss',
    random_state=17
)

In [5]:
cv_results = crossval.validate_model_cv(model, splits_prepped)

Split 1 PR-AUC: 0.0016, Average Precision: 0.0016
Split 2 PR-AUC: 0.4072, Average Precision: 0.6666
Split 3 PR-AUC: 0.2061, Average Precision: 0.2102
Split 4 PR-AUC: 0.1695, Average Precision: 0.1736
Split 5 PR-AUC: 0.0618, Average Precision: 0.0656


In [6]:
cv_results

{'val_pr_aucs': [0.0016203611423199096,
  0.4071925585635356,
  0.20614538999691764,
  0.16947470743155707,
  0.061791797085183627],
 'val_ave_precisions': [0.0016439632421445783,
  0.6665589846888397,
  0.2102479255491992,
  0.17361777260193473,
  0.06560345306510801]}