## Libraries



In [1]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# foldername = '/home/mrsergazinov/python-git-workspace/gluformer/'
foldername = '/content/drive/My Drive/github/gluformer/'
assert foldername is not None, "[!] Enter the foldername."

# add path to .py code
import sys
sys.path.append(foldername)

Mounted at /content/drive


In [2]:
%%capture
!pip install pytorch-lightning==1.4.9
!pip install pytorch-forecasting
!pip install pandas --upgrade

In [3]:
import numpy as np
import pandas as pd
import os
import pickle

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

import tensorflow as tf 
import tensorboard as tb 
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



## Preprocessing Data

In [4]:
path = foldername + 'gludata/data'
with open(path+"/test_data_pyforecast.pkl", 'rb') as f:
      test_data_raw = pickle.load(f)

In [5]:
def read_data(data):
  data_pd = pd.DataFrame(columns = ["id", "segment", "timeidx", "CGM", "dayofyear", "dayofmonth", "dayofweek", "hour", "minute", "date"])
  for i in range(len(data)):
    temp = pd.DataFrame()
    temp["id"] = [data[i][0]] * len(data[i][1]) 
    temp["segment"] = [str(i)] * len(data[i][1]) 
    temp["timeidx"] = len(data_pd) + np.arange(1, len(data[i][1]) + 1)
    temp["CGM"] = data[i][1].flatten() 
    temp[["dayofyear", "dayofmonth", "dayofweek", "hour", "minute"]] = data[i][2]
    temp["date"] = data[i][3]
    data_pd = data_pd.append(temp)
  data_pd["timeidx"] = data_pd["timeidx"].astype(int)
  data_pd["id"] = data_pd["id"].astype(str).astype("category")
  data_pd["segment"] = data_pd["segment"].astype(str).astype("category")
  data_pd = data_pd.reset_index()
  data_pd = data_pd.drop(columns=["index"])
  return data_pd

In [6]:
test_data_pd = read_data(test_data_raw)

test_data = TimeSeriesDataSet(
    test_data_pd,
    time_idx="timeidx",
    target="CGM",
    group_ids=["id", "segment"],
    max_encoder_length=180,
    max_prediction_length=12,
    static_categoricals=["id", "segment"],
    time_varying_known_reals=["timeidx", 
                              "dayofyear", 
                              "dayofmonth", 
                              "dayofweek", 
                              "hour", 
                              "minute"],
    target_normalizer=None,
)

test_dataloader = test_data.to_dataloader(train=False, batch_size=32, num_workers=2)

In [14]:
test_data_pd

Unnamed: 0,id,segment,timeidx,CGM,dayofyear,dayofmonth,dayofweek,hour,minute,date
0,0,0,1,-2.362637,0.419178,0.032258,-1.000000,0.916667,-0.933333,2013-09-16 23:02:00
1,0,0,2,-2.362637,0.419178,0.032258,-1.000000,0.916667,-0.766667,2013-09-16 23:07:00
2,0,0,3,-2.362637,0.419178,0.032258,-1.000000,0.916667,-0.600000,2013-09-16 23:12:00
3,0,0,4,-2.390110,0.419178,0.032258,-1.000000,0.916667,-0.433333,2013-09-16 23:17:00
4,0,0,5,-2.362637,0.419178,0.032258,-1.000000,0.916667,-0.266667,2013-09-16 23:22:00
...,...,...,...,...,...,...,...,...,...,...
24047,37,51,24048,-2.582418,-0.550685,0.483871,0.428571,-0.666667,-0.233333,2013-03-23 04:23:00
24048,37,51,24049,-2.582418,-0.550685,0.483871,0.428571,-0.666667,-0.066667,2013-03-23 04:28:00
24049,37,51,24050,-2.472527,-0.550685,0.483871,0.428571,-0.666667,0.100000,2013-03-23 04:33:00
24050,37,51,24051,-2.417582,-0.550685,0.483871,0.428571,-0.666667,0.266667,2013-03-23 04:38:00


## Evaluating Model

In [7]:
tft = TemporalFusionTransformer.from_dataset(
    test_data,
    learning_rate=0.0001,
    hidden_size=160,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=160,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    reduce_on_plateau_patience=4,
)
tft_fitted = tft.load_from_checkpoint(foldername+'/logs-compare/tft.ckpt')

In [8]:
# calcualte mean absolute error on validation set
actuals = torch.cat([y[0] for x, y in iter(test_dataloader)])
predictions = tft_fitted.predict(test_dataloader)

In [9]:
# scale predictions
UPPER = 402
LOWER = 38
SCALE_1 = 5
SCALE_2 = 2
actuals = (actuals + SCALE_1) / (SCALE_1 * SCALE_2) * (UPPER - LOWER) + LOWER
predictions = (predictions + SCALE_1) / (SCALE_1 * SCALE_2) * (UPPER - LOWER) + LOWER

In [13]:
actuals[1]

tensor([234.0000, 232.0000, 229.5000, 227.0000, 220.0000, 219.0000, 214.0000,
        209.0000, 207.0000, 196.0000, 195.0000, 184.0000])

In [12]:
predictions[1]

tensor([140.5833, 152.4465, 157.4524, 161.1237, 164.6678, 167.3861, 167.8139,
        165.7102, 163.6346, 162.9602, 163.0077, 161.6293], dtype=torch.float64)