In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import pandas as pd
import numpy as np
import os

In [17]:
import hypopredict.cv as cv
from hypopredict.params import TRAIN_DAYS

In [18]:
# hand-picked TEST days and DEMO days are held out already
# make sure the number of days is divisible by K for K-fold CV
non_test_days = TRAIN_DAYS.copy()

In [19]:
non_test_days

[11,
 13,
 14,
 21,
 22,
 24,
 42,
 43,
 44,
 51,
 52,
 54,
 61,
 63,
 71,
 72,
 74,
 82,
 92,
 93]

In [20]:
ECG_PATH = os.getenv('ECG_PATH')
ECG_PATH

'/Users/alexxela/code/hypopredict/data/feathers/'

In [29]:
splitter = cv.CV_splitter(n_splits=4,
                       random_state=17,
                       ecg_dir=ECG_PATH,
                       glucose_src='local', # local is much faster
                       )

In [30]:
splitter

<hypopredict.cv.CV_splitter at 0x113686f60>

In [31]:
splitter.__dict__

{'n_splits': 4,
 'random_state': 17,
 'ecg_dir': '/Users/alexxela/code/hypopredict/data/feathers/',
 'glucose_src': 'local'}

In [32]:
splits = splitter.get_splits(non_test_days)

In [33]:
# people are shuffled
# days are shuffled because we test only full days
# and our features are day-based
splits

[array([71, 21, 14, 63, 24]),
 array([44, 61, 13, 52, 54]),
 array([43, 22, 92, 11, 51]),
 array([74, 42, 93, 82, 72])]

In [34]:
# validate needs GLUCOSE_PATH env var set
_ = os.getenv('GLUCOSE_PATH')
_

'/Users/alexxela/code/hypopredict/data/dbt-glucose/'

In [35]:
checks, props = splitter.validate(splits)

--------------------------------------------------
Validating split: [71 21 14 63 24]

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-12_35_54-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-20_29_57-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-17_43_12-2HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-07_01_03-2HG.feather']
--------------------------------------------------
Validating split: [44 61 13 52 54]

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-06_32_58-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-16_49_30-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-61-15_08_04-0HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWav

In [36]:
checks, props

([np.True_, np.True_, np.True_, np.True_],
 array([0.1729, 0.0575, 0.2458, 0.0164]))

In [14]:
splitter.validate(splits, verbose=True)

--------------------------------------------------
Validating split: [13 21 74]

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-12_35_54-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-20_29_57-1HG.feather']

Split [13 21 74] is valid with 26.25% of y == 1

--------------------------------------------------
Validating split: [44 42 24]

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-06_32_58-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-44-16_49_30-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-17_43_12-2HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-07_01_03-2HG.feather']

Split [44 42 24] is valid with 4.01% of y == 1

--------------------------------------------------
Validating split: [52 31 11]

    Files conca

([np.True_, np.True_, np.False_, np.False_],
 array([  0.2625,   0.0401,   0.    , -33.3333]))

In [37]:
splits

[array([71, 21, 14, 63, 24]),
 array([44, 61, 13, 52, 54]),
 array([43, 22, 92, 11, 51]),
 array([74, 42, 93, 82, 72])]

---
### Turn PersonDays into actual ECG chunks

In [38]:
from hypopredict import chunker

In [44]:
split_chunkified = chunker.chunkify(splits[0],
                                    chunk_size=pd.Timedelta(hours=1),
                                    step_size=pd.Timedelta(minutes=5),
                                    ecg_dir=ECG_PATH)


    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-12_35_54-1HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-21-20_29_57-1HG.feather']

    Files concatinated:
                 ['/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-17_43_12-2HG.feather', '/Users/alexxela/code/hypopredict/data/feathers/EcgWaveform-24-07_01_03-2HG.feather']


In [45]:
type(split_chunkified)

dict

In [46]:
print(split_chunkified.keys())
print(splits[0])

dict_keys([71, 21, 14, 63, 24])
[71 21 14 63 24]
