In [1]:
import os
import warnings
warnings.filterwarnings("ignore")  # avoid printing out absolute paths
import copy
from pathlib import Path
import warnings
import numpy as np
import glob
import pandas as pd
import torch
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.tuner import Tuner
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
import pickle

In [2]:
max_prediction_length = 2*24 #the goal is to make a one-day forecast 48
max_encoder_length = 7*2*24
group = 0 # a week 336
test_sequence = pd.read_csv('tankleak.csv')
test_sequence = test_sequence.drop(columns=["Month", "Year", "Season"])
test_sequence['period'] = test_sequence['period'].astype(str)
TRAINSIZE = 2000
VALIDSIZE = 500
data = test_sequence[lambda x: x.time_idx <= TRAINSIZE+VALIDSIZE]

training = TimeSeriesDataSet(
    test_sequence[lambda x: x.time_idx <= TRAINSIZE],
    time_idx="time_idx",
    target="Var_tc_readjusted", #variance
    group_ids=["group_id"], #tank id
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group_id"], #tank id, tank location state
    static_reals=["tank_max_height", "tank_max_volume"], #tank max height, tank max volume, no. of pumps attached to the tank
    time_varying_known_categoricals=["Time_of_day"], #season, month, remove "Month", "Year", "Season" if use only a month of data for training
    time_varying_known_reals=["time_idx"], #time_idx,
    time_varying_unknown_categoricals=["period"],  #  period (idle, transaction, delivery)
    time_varying_unknown_reals=[
        "Var_tc_readjusted",
        "Del_tc",
        "Sales_Ini_tc",
        "ClosingHeight_tc_readjusted",
        "ClosingStock_tc_readjusted",
        "TankTemp",
    ], # variance, volume, height, sales(-), delivery(+), temperature,
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [3]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="optuna_test",
    n_trials=20,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2023-10-03 21:51:40,452] A new study created in memory with name: no-name-c2ede968-bb73-43e0-ac0c-9a8a8a408bc0
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=50` reached.
[I 2023-10-03 23:16:45,138] Trial 0 finished with value: 2.842017650604248 and parameters: {'gradient_clip_val': 0.3595375095813248, 'hidden_size': 14, 'dropout': 0.28519335459069856, 'hidden_continuous_size': 9, 'attention_head_size': 2, 'learning_rate': 0.03601119460172352}. Best is trial 0 with value: 2.842017650604248.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=50` reached.
[I 2023-10-04 00:48:14,681] Trial 1 finished with value: 2.804687738418579 and parameters: {'gradient_clip_val': 0.03571663686788617, 'hidden_size': 15, 'dropout': 0.17085

{'gradient_clip_val': 0.03571663686788617, 'hidden_size': 15, 'dropout': 0.17085966100984748, 'hidden_continuous_size': 8, 'attention_head_size': 2, 'learning_rate': 0.04044358859683056}
