In [1]:
# Install necessary packages
# !pip install optuna darts chardet seaborn tqdm
# !pip install tensorboard

# Set environment variables and load extensions
%set_env PYTORCH_ENABLE_MPS_FALLBACK=1
%load_ext autoreload
%autoreload 2
%matplotlib inline
# %load_ext tensorboard 
# %tensorboard --logdir darts_logs --port 6007

# Import necessary libraries
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import os
from darts import TimeSeries, concatenate
from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, IceCreamHeaterDataset
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.utils.likelihood_models import QuantileRegression
from darts.metrics import mape, rmse
import warnings
import optuna
import torch
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import EarlyStopping
from sklearn.preprocessing import MaxAbsScaler
from darts.metrics import smape
from darts.utils.likelihood_models import GaussianLikelihood
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)


env: PYTORCH_ENABLE_MPS_FALLBACK=1


In [2]:

# Constants definition
HIDDEN = 64
VALWAIT = 1
RAND = 42
N_SAMPLES = 100
QUANTILES = [0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99]
SPLIT = 0.9
FIGSIZE = (9, 6)
qL1, qL2 = 0.01, 0.10
qU1, qU2 = 1-qL1, 1-qL2
label_q1 = f'{int(qU1 * 100)} / {int(qL1 * 100)} percentile band'
label_q2 = f'{int(qU2 * 100)} / {int(qL2 * 100)} percentile band'
torch.manual_seed(RAND)


<torch._C.Generator at 0x14745ca90>

In [3]:

# Data preprocessing function
def preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df.rename(columns={"date_time": "datetime_col"}, inplace=True)
    df['datetime_col'] = pd.to_datetime(df['datetime_col'])
    df['bolus'] = df['bolus'].astype('float32')
    df['carbs'] = df['carbs'].astype('float32')
    df['insulin_on_board'] = df['insulin_on_board'].astype('float32')
    int_columns = df.select_dtypes(include=['float64']).columns
    df[int_columns] = df[int_columns].astype('float32')
    return df

# Data loading and preprocessing
file_path = 't1d_glucose_data.csv'
data = preprocess_data(file_path)

# Time series object creation for target variable
ts_P = TimeSeries.from_dataframe(data, 'datetime_col', 'glucose_value', freq='5T')
df_covF = data.loc[:, data.columns != "glucose_value"]
ts_covF = TimeSeries.from_dataframe(df_covF, 'datetime_col', freq='5T')
train_size = int(len(ts_P) * SPLIT)
split_timestamp = data.iloc[train_size]['datetime_col']
ts_train, ts_temp = ts_P.split_after(pd.Timestamp(split_timestamp))
test_size = int(len(ts_temp) * 0.5)
split_timestamp_test = data.iloc[train_size + test_size]['datetime_col']
ts_test, ts_hold_out = ts_temp.split_after(pd.Timestamp(split_timestamp_test))

# Data scaling
scalerP = Scaler()
scalerP.fit_transform(ts_train)
ts_ttrain = scalerP.transform(ts_train)
ts_ttest = scalerP.transform(ts_test)    
ts_t = scalerP.transform(ts_P)
ts_hold_out_scaled = scalerP.transform(ts_hold_out)
ts_t = ts_t.astype(np.float32)
ts_ttrain = ts_ttrain.astype(np.float32)
ts_ttest = ts_ttest.astype(np.float32)
ts_hold_out_scaled = ts_hold_out_scaled.astype(np.float32)

# Feature covariates split and scaling
covF_train, covF_test = ts_covF.split_after(SPLIT)
scalerF = Scaler()
scalerF.fit_transform(covF_train)
covF_ttrain = scalerF.transform(covF_train) 
covF_ttest = scalerF.transform(covF_test)   
covF_t = scalerF.transform(ts_covF)  
covF_ttrain = covF_ttrain.astype(np.float32)
covF_ttest = covF_ttest.astype(np.float32)

# Assignments for simplicity in further code
ts_cov = ts_covF 
cov_t = covF_t 
cov_ttrain = covF_ttrain 
cov_ttest = covF_ttest


In [4]:

# Objective function for Optuna study
def objective(trial):
    # Hyperparameter space definition
    in_len = trial.suggest_int("in_len", 12, 36)
    out_len = trial.suggest_int("out_len", 1, 4)
    hidden_size = trial.suggest_int("hidden_size", 32, 128, 2)
    lstm_layers = trial.suggest_int("lstm_layers", 5, 8, 2)
    att_h = trial.suggest_int("att_h", 5, 8, 2)
    dropout_val = trial.suggest_float("dropout_val", 0.0, 0.4)
    batch_sizes = trial.suggest_int("batch_sizes", 8, 64, 8)
    lr = trial.suggest_float("lr", 5e-5, 1e-3, log=True)

    # Callbacks for pruning and early stopping
    pruner = PyTorchLightningPruningCallback(trial, monitor="val_loss")
    early_stopper = EarlyStopping("val_loss", min_delta=0.001, patience=3, verbose=True)
    callbacks = [pruner, early_stopper]
    num_workers = 0
    pl_trainer_kwargs = {"accelerator": "auto", "callbacks": callbacks}

    # TFTModel initialization
    model = TFTModel(   
        input_chunk_length=in_len,
        output_chunk_length=1,
        hidden_size=HIDDEN,
        lstm_layers=lstm_layers,
        num_attention_heads=att_h,
        dropout=dropout_val,
        batch_size=batch_sizes, 
        n_epochs=35,                        
        nr_epochs_val_period=VALWAIT, 
        likelihood=QuantileRegression(QUANTILES), 
        optimizer_kwargs={"lr": lr}, 
        model_name="TFT_Glucose_v2",
        log_tensorboard=True,
        random_state=RAND,
        force_reset=True,
        save_checkpoints=True,
        add_relative_index=True
    )
    # Model training
    model.fit(  
        series=ts_ttrain, 
        past_covariates=cov_t, 
        val_series=ts_ttest, 
        val_past_covariates=cov_t, 
        verbose=True
    )
    model = TFTModel.load_from_checkpoint("TFT_Glucose_v2")
    
    # Prediction and evaluation
    ts_tpred = model.predict(n=len(ts_ttest), num_samples=N_SAMPLES, verbose=True)
    ts_tq = ts_tpred.quantile_timeseries(.5)
    ts_q = scalerP.inverse_transform(ts_tq)
    rmse_val = rmse(ts_q, ts_test)

    return rmse_val if rmse_val != np.nan else float("inf")


In [5]:

# Callback function for study optimization
def print_callback(study, trial):
    print(f"Current RMSE: {trial.value}, Current params: {trial.params}")
    print(f"Best RMSE: {study.best_value}, Best params: {study.best_trial.params}")



In [None]:
# Optuna study creation and optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10000, callbacks=[print_callback])

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]