## Libraries



In [1]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# foldername = '/home/mrsergazinov/python-git-workspace/gluformer/'
foldername = '/content/drive/My Drive/github/gluformer/'
assert foldername is not None, "[!] Enter the foldername."

# add path to .py code
import sys
sys.path.append(foldername)

Mounted at /content/drive


In [2]:
%%capture
!pip install pytorch-lightning==1.4.9
!pip install pytorch-forecasting
!pip install pandas --upgrade

In [1]:
import numpy as np
import pandas as pd
import os
import pickle

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

import tensorflow as tf 
import tensorboard as tb 
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

## Preprocessing Data

In [2]:
foldername = '/home/mrsergazinov/python-git-workspace/gluformer/'

path = foldername + 'gludata/data'
with open(path+"/test_data_pyforecast.pkl", 'rb') as f:
      test_data_raw = pickle.load(f)

In [3]:
def read_data(data):
    data_len = sum([len(data[i][1]) for i in range(len(data))])
    data_pd = pd.DataFrame(index = range(data_len),
                           columns = ["id", "segment", "timeidx", "CGM", 
                                      "dayofyear", "dayofmonth", "dayofweek", "hour", 
                                      "minute", "date"])
    start = 0
    for i in range(len(data)):
        block_len = len(data[i][1]) 
        data_pd["id"][start:(start+block_len)] = [data[i][0]] * block_len
        data_pd["segment"][start:(start+block_len)] = [str(i)] * block_len 
        data_pd["timeidx"][start:(start+block_len)] = range(block_len)
        data_pd["CGM"][start:(start+block_len)] = data[i][1].flatten() 
        data_pd["date"][start:(start+block_len)] = data[i][3]
        start += block_len
    
    # set format
    data_pd["timeidx"] = data_pd["timeidx"].astype("int")
    data_pd["CGM"] = data_pd["CGM"].astype("float")
    
    #extract time features
    data_pd["date"] = pd.to_datetime(data_pd["date"])
    data_pd["dayofyear"] = data_pd["date"].dt.dayofyear.astype("category")
    data_pd["dayofmonth"] = data_pd["date"].dt.day.astype("category")
    data_pd["dayofweek"] = data_pd["date"].dt.dayofweek.astype("category")
    data_pd["hour"] = data_pd["date"].dt.hour.astype("category")
    data_pd["minute"] = data_pd["date"].dt.minute.astype("category")
    
    # add id and segment 
    data_pd["id"] = data_pd["id"].astype(str).astype("category")
    data_pd["segment"] = data_pd["segment"].astype(str).astype("category")
    
    # reset index
    data_pd = data_pd.reset_index()
    data_pd = data_pd.drop(columns=["index"])
    return data_pd

test_data_pd = read_data(test_data_raw)

In [4]:
test_data = TimeSeriesDataSet(
    test_data_pd,
    time_idx="timeidx",
    target="CGM",
    group_ids=["id", "segment"],
    max_encoder_length=180,
    max_prediction_length=12,
    static_categoricals=["id"],
    time_varying_known_reals=["timeidx", 
                              "dayofyear", 
                              "dayofmonth", 
                              "dayofweek", 
                              "hour", 
                              "minute"],
    target_normalizer=None,
)
test_dataloader = test_data.to_dataloader(train=False, batch_size=32, num_workers=0)

In [5]:
test_data_pd

Unnamed: 0,id,segment,timeidx,CGM,dayofyear,dayofmonth,dayofweek,hour,minute,date
0,0,0,0,-2.362637,259,16,0,23,2,2013-09-16 23:02:00
1,0,0,1,-2.362637,259,16,0,23,7,2013-09-16 23:07:00
2,0,0,2,-2.362637,259,16,0,23,12,2013-09-16 23:12:00
3,0,0,3,-2.390110,259,16,0,23,17,2013-09-16 23:17:00
4,0,0,4,-2.362637,259,16,0,23,22,2013-09-16 23:22:00
...,...,...,...,...,...,...,...,...,...,...
24047,37,51,462,-2.582418,82,23,5,4,23,2013-03-23 04:23:00
24048,37,51,463,-2.582418,82,23,5,4,28,2013-03-23 04:28:00
24049,37,51,464,-2.472527,82,23,5,4,33,2013-03-23 04:33:00
24050,37,51,465,-2.417582,82,23,5,4,38,2013-03-23 04:38:00


In [6]:
test_data_pd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24052 entries, 0 to 24051
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   id          24052 non-null  category      
 1   segment     24052 non-null  category      
 2   timeidx     24052 non-null  int64         
 3   CGM         24052 non-null  float64       
 4   dayofyear   24052 non-null  category      
 5   dayofmonth  24052 non-null  category      
 6   dayofweek   24052 non-null  category      
 7   hour        24052 non-null  category      
 8   minute      24052 non-null  category      
 9   date        24052 non-null  datetime64[ns]
dtypes: category(7), datetime64[ns](1), float64(1), int64(1)
memory usage: 737.0 KB


## Evaluating Model

In [7]:
tft = TemporalFusionTransformer.from_dataset(
    test_data,
    learning_rate=0.001,
    hidden_size=160,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=160,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4,
)
tft_fitted = tft.load_from_checkpoint(foldername+'/logs-compare/tft.ckpt')

In [8]:
# calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(test_dataloader)])
predictions = tft_fitted.predict(test_dataloader)

In [10]:
# scale predictions
UPPER = 402
LOWER = 38
SCALE_1 = 5
SCALE_2 = 2
actuals = (actuals + SCALE_1) / (SCALE_1 * SCALE_2) * (UPPER - LOWER) + LOWER
predictions = (predictions + SCALE_1) / (SCALE_1 * SCALE_2) * (UPPER - LOWER) + LOWER

In [11]:
actuals[1]

tensor([234.0000, 232.0000, 229.5000, 227.0000, 220.0000, 219.0000, 214.0000,
        209.0000, 207.0000, 196.0000, 195.0000, 184.0000])

In [12]:
predictions[1]

tensor([176.9777, 179.4152, 179.4377, 183.5591, 187.2383, 188.6252, 189.5839,
        191.1761, 193.2892, 195.3918, 196.5208, 194.4286], dtype=torch.float64)

In [18]:
((actuals - predictions) ** 2).mean(axis=1).sqrt().median()

tensor(67.2113, dtype=torch.float64)