In [1]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import torch
import numpy as np
import pandas as pd

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

  from tqdm.autonotebook import tqdm


In [2]:
from ESRNN.m4_data import *
from ESRNN.utils_evaluation import evaluate_prediction_owa
from ESRNN.utils_visualization import plot_grid_prediction

In [3]:
X_train_df, y_train_df, X_test_df, y_test_df = prepare_m4_data(dataset_name="Hourly",
                                                               directory="../data/M4",
                                                               num_obs=414)





In [4]:
y_train_h1 = y_train_df[y_train_df['unique_id']=='H1']

In [5]:
df = pd.DataFrame()
df = y_train_h1.copy()
df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600
df['ds'] = df['ds'].astype(int)
df

Unnamed: 0,unique_id,ds,y
0,H1,0,605.0
1,H1,1,586.0
2,H1,2,586.0
3,H1,3,559.0
4,H1,4,511.0
...,...,...,...
695,H1,695,790.0
696,H1,696,784.0
697,H1,697,752.0
698,H1,698,739.0


In [6]:
max_encoder_length = 24*7
max_prediction_length = 48

In [7]:
training = TimeSeriesDataSet(
    df.iloc[:-48],
    time_idx="ds",
    target="y",
    group_ids=['unique_id'],
    min_encoder_length=max_encoder_length // 2,
    min_prediction_length=1,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    target_normalizer=GroupNormalizer(
        groups=["unique_id"], transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

In [8]:
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [9]:
baseline_predictions = Baseline().predict(val_dataloader, return_y=True)
MAE()(baseline_predictions.output, baseline_predictions.y)

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


tensor(145.6667, device='cuda:0')

In [10]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=20, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=100,
    accelerator="gpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[early_stop_callback],
    logger=False,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=2,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=SMAPE(),
    # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 16.6k


In [11]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)


c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:634: Checkpoint directory c:\Users\sonng\Desktop\Important Files - Sep 2023\Hildesheim University - Masters in Data Analytics\Master Thesis\Code\Git\TSFusionForecast\TSFusionForecast\notebooks\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 64    
4  | static_variable_selection          | VariableSelectionNetwork        | 1.7 K 
5  | encoder_variable_selection    

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 66: 100%|██████████| 5/5 [00:01<00:00,  4.11it/s, train_loss_step=0.209, val_loss=0.207, train_loss_epoch=0.206]


In [12]:
predictions = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator="gpu"))
MAE()(predictions.output, predictions.y)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


tensor(140.3947, device='cuda:0')

In [10]:
import pickle

from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="optuna_test",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2023-11-23 09:38:11,869] A new study created in memory with name: no-name-2376d5b2-620c-4fd6-a7ce-1fdf64729956
  gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  dropout=trial.suggest_uniform("dropout", *dropout_range),
  model.hparams.learning_rate = trial.suggest_loguniform("learning_rate", *learning_rate_range)
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:634: Checkpoint directory C:\Users\sonng\Desktop\Important Files - Sep 2023\Hildesheim University - Masters in Data Analytics\Master Thesis\Code\Git\TSFusionForecast\TSFusionForecast\notebooks\optuna_test\trial_0 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_

In [71]:
with open("test_study.pkl", "rb") as fin:
    study = pickle.load(fin)


In [72]:
print(study.best_trial.params)

{'gradient_clip_val': 0.43012832204522905, 'hidden_size': 60, 'dropout': 0.12384425005697666, 'hidden_continuous_size': 27, 'attention_head_size': 4, 'learning_rate': 0.05352813757705075}


In [11]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=30, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=100,
    accelerator="gpu",
    enable_model_summary=True,
    gradient_clip_val=0.43012832204522905,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[early_stop_callback],
    logger=False,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.05352813757705075,
    hidden_size=60,
    attention_head_size=4,
    dropout=0.12384425005697666,
    hidden_continuous_size=27,
    loss=SMAPE(),
    # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 208.6k


In [12]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)


c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:634: Checkpoint directory c:\Users\sonng\Desktop\Important Files - Sep 2023\Hildesheim University - Masters in Data Analytics\Master Thesis\Code\Git\TSFusionForecast\TSFusionForecast\notebooks\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 216   
4  | static_variable_selection          | VariableSelectionNetwork        | 16.0 K
5  | encoder_variable_selection    

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 64: 100%|██████████| 5/5 [00:01<00:00,  4.05it/s, train_loss_step=0.198, val_loss=0.210, train_loss_epoch=0.204]


In [30]:
predictions = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator="gpu"))
MAE()(predictions.output, predictions.y)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


tensor(142.7370, device='cuda:0')

In [31]:
predictions

Prediction(output=tensor([[680.9622, 680.3881, 680.8605, 680.8868, 679.6391, 677.5711, 675.0679,
         672.9744, 672.0952, 672.4355, 672.7614, 672.7838, 672.5797, 671.5884,
         671.0643, 671.5206, 672.0035, 672.2932, 671.9869, 670.1862, 667.1890,
         664.8445, 663.1027, 661.6211, 661.2626, 662.1609, 662.8173, 662.2479,
         661.0191, 660.5146, 661.3795, 664.7376, 671.8805, 679.0512, 682.4625,
         682.1228, 680.4723, 679.6100, 679.8751, 680.2469, 680.2009, 679.6882,
         678.7925, 677.7063, 676.6259, 675.6730, 674.9177, 674.3941]],
       device='cuda:0'), x=None, index=None, decoder_lengths=None, y=(tensor([[664., 550., 544., 505., 483., 469., 466., 487., 492., 531., 583., 659.,
         743., 811., 863., 898., 914., 920., 926., 919., 887., 862., 829., 769.,
         691., 618., 563., 529., 504., 489., 487., 508., 513., 555., 606., 676.,
         761., 837., 878., 890., 879., 847., 820., 790., 784., 752., 739., 684.]],
       device='cuda:0'), None))

: 

In [24]:
predictions.output.cpu().detach().numpy().flatten()

array([680.96216, 680.38806, 680.86053, 680.8868 , 679.6391 , 677.5711 ,
       675.0679 , 672.9744 , 672.0952 , 672.43555, 672.76135, 672.7838 ,
       672.5797 , 671.5884 , 671.06433, 671.52057, 672.00354, 672.2932 ,
       671.98694, 670.18616, 667.189  , 664.84454, 663.1027 , 661.6211 ,
       661.2626 , 662.16095, 662.8173 , 662.2479 , 661.0191 , 660.51465,
       661.37946, 664.7376 , 671.8805 , 679.0512 , 682.46246, 682.1228 ,
       680.4723 , 679.61005, 679.8751 , 680.24695, 680.20087, 679.68823,
       678.7925 , 677.7063 , 676.6259 , 675.67303, 674.9177 , 674.3941 ],
      dtype=float32)

In [25]:
predictions = tft.predict(val_dataloader, return_y=False, trainer_kwargs=dict(accelerator="gpu"))


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


In [29]:
predictions.cpu().numpy().flatten()

array([680.96216, 680.38806, 680.86053, 680.8868 , 679.6391 , 677.5711 ,
       675.0679 , 672.9744 , 672.0952 , 672.43555, 672.76135, 672.7838 ,
       672.5797 , 671.5884 , 671.06433, 671.52057, 672.00354, 672.2932 ,
       671.98694, 670.18616, 667.189  , 664.84454, 663.1027 , 661.6211 ,
       661.2626 , 662.16095, 662.8173 , 662.2479 , 661.0191 , 660.51465,
       661.37946, 664.7376 , 671.8805 , 679.0512 , 682.46246, 682.1228 ,
       680.4723 , 679.61005, 679.8751 , 680.24695, 680.20087, 679.68823,
       678.7925 , 677.7063 , 676.6259 , 675.67303, 674.9177 , 674.3941 ],
      dtype=float32)