In [1]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

import torch
import numpy as np
import pandas as pd

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

  from tqdm.autonotebook import tqdm


In [2]:
from ESRNN.m4_data import *
from ESRNN.utils_evaluation import evaluate_prediction_owa
from ESRNN.utils_visualization import plot_grid_prediction

In [3]:
X_train_df, y_train_df, X_test_df, y_test_df = prepare_m4_data(dataset_name="Hourly",
                                                               directory="../data/M4",
                                                               num_obs=414)





In [4]:
y_train_df

Unnamed: 0,unique_id,ds,y
0,H1,1970-01-01 00:00:00,605.0
1,H1,1970-01-01 01:00:00,586.0
2,H1,1970-01-01 02:00:00,586.0
3,H1,1970-01-01 03:00:00,559.0
4,H1,1970-01-01 04:00:00,511.0
...,...,...,...
353495,H99,1970-01-29 23:00:00,27926.0
353496,H99,1970-01-30 00:00:00,26744.0
353497,H99,1970-01-30 01:00:00,25829.0
353498,H99,1970-01-30 02:00:00,25421.0


In [5]:
df_train = pd.DataFrame()

In [6]:
df_train['ds'] = (y_train_df['ds'] - y_train_df['ds'].min()).dt.total_seconds() // 3600
df_train['ds'] = df_train['ds'].astype(int)

In [7]:
df_train['unique_id'] = y_train_df['unique_id']
df_train['y'] = y_train_df['y']
df_train

Unnamed: 0,ds,unique_id,y
0,0,H1,605.0
1,1,H1,586.0
2,2,H1,586.0
3,3,H1,559.0
4,4,H1,511.0
...,...,...,...
353495,695,H99,27926.0
353496,696,H99,26744.0
353497,697,H99,25829.0
353498,698,H99,25421.0


In [8]:
max_encoder_length = 24*7
max_prediction_length = 48

In [9]:
cutoffs = df_train.groupby('unique_id')['ds'].transform(lambda x: x.max() - max_prediction_length)


In [10]:
training_data = df_train[df_train['ds'] <= cutoffs]

In [11]:
training_data

Unnamed: 0,ds,unique_id,y
0,0,H1,605.0
1,1,H1,586.0
2,2,H1,586.0
3,3,H1,559.0
4,4,H1,511.0
...,...,...,...
353447,647,H99,23468.0
353448,648,H99,22524.0
353449,649,H99,22090.0
353450,650,H99,21857.0


In [12]:
training = TimeSeriesDataSet(
    df_train.iloc[:-max_prediction_length],
    time_idx="ds",
    target="y",
    group_ids=['unique_id'],
    max_encoder_length=max_encoder_length,
    # min_encoder_length=max_encoder_length // 2,
    min_encoder_length=1,
    max_prediction_length=max_prediction_length,
    # min_prediction_length=max_prediction_length // 2,
    min_prediction_length=1,
    # time_varying_known_reals=['y_arima', 'y_theta', 'y_xgb', 'y_gru', 'y_lstm'],  # Base model forecasts
    target_normalizer=GroupNormalizer(
        groups=["unique_id"], transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    # allow_missing_timesteps=Truex
    )

validation = TimeSeriesDataSet.from_dataset(training, df_train, predict=True, stop_randomization=True)
batch_size = 64  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-5, patience=50, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=150,
    accelerator="gpu",
    # enable_model_summary=True,
    gradient_clip_val=0.43012832204522905,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[early_stop_callback],
    logger=False,
    enable_model_summary=False
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.05352813757705075,
    hidden_size=60,
    attention_head_size=4,
    dropout=0.12384425005697666,
    hidden_continuous_size=27,
    loss=SMAPE(),
    # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
    # print_summary=False
)

trainer.fit(
tft,
train_dataloaders=train_dataloader,
val_dataloaders=val_dataloader,

)

# setting up test set
df_test = y_test_df
df = y_test_df.drop(columns=['y_hat_naive2']).copy()
df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600 + df_train['ds'].max() + 1  #700
df['ds'] = df['ds'].astype(int)
df_test = df.reset_index(drop=True)

# predictions = tft.predict(test_dataloader, return_y=False, trainer_kwargs=dict(accelerator="gpu"))
new_raw_predictions = tft.predict(df_test, mode="raw", return_x=True, trainer_kwargs=dict(accelerator="gpu"))
# all_forecasts[unique_id] = new_raw_predictions.output.prediction.cpu().numpy().flatten()



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 65: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s, train_loss_step=0.338, val_loss=0.340, train_loss_epoch=0.342]




AssertionError: filters should not remove entries all entries - check encoder/decoder lengths and lags

In [43]:
df_test = y_test_df
df = y_test_df.drop(columns=['y_hat_naive2']).copy()
# df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600 + df_train['ds'].max() + 1  #700
df['ds'] = df.groupby('unique_id')['ds'].transform(lambda x: (x - x.min()).dt.total_seconds() // 3600 + df_train[df_train['unique_id'] == x.name]['ds'].max() + 1)
df['ds'] = df['ds'].astype(int)
df_test = df.reset_index(drop=True)
df_test

Unnamed: 0,unique_id,ds,y
0,H1,700,619.0
1,H1,701,565.0
2,H1,702,532.0
3,H1,703,495.0
4,H1,704,481.0
...,...,...,...
19867,H99,743,24039.0
19868,H99,744,22946.0
19869,H99,745,22217.0
19870,H99,746,21416.0


In [32]:
encoder_data = df_train[df_train.groupby('unique_id').apply(lambda x: x.ds > x.ds.max() - max_encoder_length).reset_index(level='unique_id').ds]
encoder_data

Unnamed: 0,ds,unique_id,y
532,532,H1,635.0
533,533,H1,577.0
534,534,H1,533.0
535,535,H1,504.0
536,536,H1,485.0
...,...,...,...
353495,695,H99,27926.0
353496,696,H99,26744.0
353497,697,H99,25829.0
353498,698,H99,25421.0


In [44]:
encoder_data[encoder_data['unique_id'] == 'H1']

Unnamed: 0,ds,unique_id,y
532,532,H1,635.0
533,533,H1,577.0
534,534,H1,533.0
535,535,H1,504.0
536,536,H1,485.0
...,...,...,...
695,695,H1,790.0
696,696,H1,784.0
697,697,H1,752.0
698,698,H1,739.0


In [45]:
df_test[df_test['unique_id'] == 'H1']

Unnamed: 0,unique_id,ds,y
0,H1,700,619.0
1,H1,701,565.0
2,H1,702,532.0
3,H1,703,495.0
4,H1,704,481.0
5,H1,705,467.0
6,H1,706,473.0
7,H1,707,488.0
8,H1,708,501.0
9,H1,709,534.0


In [46]:
new_prediction_data = pd.concat([encoder_data, df_test], ignore_index=True)

In [47]:
new_prediction_data[new_prediction_data['unique_id'] == 'H1']

Unnamed: 0,ds,unique_id,y
0,532,H1,635.0
1,533,H1,577.0
2,534,H1,533.0
3,535,H1,504.0
4,536,H1,485.0
...,...,...,...
69595,743,H1,785.0
69596,744,H1,756.0
69597,745,H1,719.0
69598,746,H1,703.0


In [48]:
new_raw_predictions = tft.predict(new_prediction_data, mode="raw", return_x=True, trainer_kwargs=dict(accelerator="gpu"))

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


In [58]:
y_hat_df = X_test_df.copy().drop(columns='x')

In [61]:
y_hat_df['y_hat'] = new_raw_predictions.output.prediction.cpu().numpy().flatten()
y_hat_df

Unnamed: 0,unique_id,ds,y_hat
0,H1,1970-01-30 04:00:00,626.577087
1,H1,1970-01-30 05:00:00,625.965759
2,H1,1970-01-30 06:00:00,626.017456
3,H1,1970-01-30 07:00:00,626.364685
4,H1,1970-01-30 08:00:00,626.921326
...,...,...,...
19867,H99,1970-01-31 23:00:00,18032.146484
19868,H99,1970-02-01 00:00:00,17967.214844
19869,H99,1970-02-01 01:00:00,17903.652344
19870,H99,1970-02-01 02:00:00,17843.085938


In [62]:
evaluate_prediction_owa(y_hat_df, y_train_df, X_test_df, y_test_df, naive2_seasonality=24)

OWA: 3.25 
SMAPE: 33.27 
MASE: 11.234 


(3.250120953427428, 11.233700517861331, 33.270065810540636)

In [13]:
df_test = y_test_df

In [15]:
df = y_test_df.drop(columns=['y_hat_naive2']).copy()
df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600 + df_train['ds'].max() + 1  #700
df['ds'] = df['ds'].astype(int)
df_test = df.reset_index(drop=True)
df_test['y'] = df_test.merge(df_train.groupby('unique_id')['y'].last().reset_index(), on='unique_id', how='left')['y_y']
df_test

Unnamed: 0,unique_id,ds,y
0,H1,960,684.0
1,H1,961,684.0
2,H1,962,684.0
3,H1,963,684.0
4,H1,964,684.0
...,...,...,...
19867,H99,1003,23252.0
19868,H99,1004,23252.0
19869,H99,1005,23252.0
19870,H99,1006,23252.0


In [35]:
df_test

Unnamed: 0,unique_id,ds,y
0,H1,960,619.0
1,H1,961,565.0
2,H1,962,532.0
3,H1,963,495.0
4,H1,964,481.0
...,...,...,...
19867,H99,1003,24039.0
19868,H99,1004,22946.0
19869,H99,1005,22217.0
19870,H99,1006,21416.0


In [36]:
df_train

Unnamed: 0,ds,unique_id,y
0,0,H1,605.0
1,1,H1,586.0
2,2,H1,586.0
3,3,H1,559.0
4,4,H1,511.0
...,...,...,...
353495,695,H99,27926.0
353496,696,H99,26744.0
353497,697,H99,25829.0
353498,698,H99,25421.0


In [52]:
df_test['y'] = df_test.merge(df_train.groupby('unique_id')['y'].last().reset_index(), on='unique_id', how='left')['y_y']
df_test

Unnamed: 0,unique_id,ds,y
0,H1,960,684.0
1,H1,961,684.0
2,H1,962,684.0
3,H1,963,684.0
4,H1,964,684.0
...,...,...,...
19867,H99,1003,23252.0
19868,H99,1004,23252.0
19869,H99,1005,23252.0
19870,H99,1006,23252.0


In [16]:
max_encoder_length = 7*24
max_prediction_length = 48

training = TimeSeriesDataSet(
    training_data[training_data['unique_id']=='H1'],
    time_idx="time_idx",
    target="y",
    group_ids=["unique_id"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    # static_categoricals=['unique_id'],
    # add_relative_time_idx=True,
    # add_target_scales=True,
    # add_encoder_length=True,
)

In [17]:
batch_size = 64

validation = TimeSeriesDataSet.from_dataset(training, df[df['unique_id']=='H1'], predict=True, stop_randomization=True)

# train_dataloader = DataLoader(training, batch_size=batch_size, shuffle=False)
# train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=5, persistent_workers=True)
# val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=5, persistent_workers=True)
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=5)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=5)

# val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [18]:
baseline_predictions = Baseline().predict(val_dataloader, return_y=True)
MAE()(baseline_predictions.output, baseline_predictions.y)


c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:198: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


tensor(145.6667, device='cuda:0')

In [19]:
# # configure network and trainer
# pl.seed_everything(42)
# trainer = pl.Trainer(
#     accelerator="cpu",
#     # clipping gradients is a hyperparameter and important to prevent divergance
#     # of the gradient for recurrent neural networks
#     gradient_clip_val=0.1,
# )


# tft = TemporalFusionTransformer.from_dataset(
#     training,
#     # not meaningful for finding the learning rate but otherwise very important
#     learning_rate=0.03,
#     hidden_size=8,  # most important hyperparameter apart from learning rate
#     # number of attention heads. Set to up to 4 for large datasets
#     attention_head_size=1,
#     dropout=0.1,  # between 0.1 and 0.3 are good values
#     hidden_continuous_size=8,  # set to <= hidden_size
#     loss=SMAPE(),
#     optimizer="Ranger"
#     # reduce learning rate if no improvement in validation loss after x epochs
#     # reduce_on_plateau_patience=1000,
# )
# print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


In [20]:
# # configure network and trainer
# early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
# lr_logger = LearningRateMonitor()  # log the learning rate
# logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

# trainer = pl.Trainer(
#     max_epochs=500,
#     accelerator="cuda",
#     enable_model_summary=True,
#     gradient_clip_val=0.1,
#     limit_train_batches=50,  # coment in for training, running valiation every 30 batches
#     # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
#     callbacks=[lr_logger, early_stop_callback],
#     logger=logger,
# )

# tft = TemporalFusionTransformer.from_dataset(
#     training,
#     learning_rate=0.03,
#     hidden_size=16,
#     attention_head_size=2,
#     dropout=0.1,
#     hidden_continuous_size=8,
#     loss=SMAPE(),
#     log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
#     optimizer="Ranger",
#     reduce_on_plateau_patience=4,
# )
# print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


In [21]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
# lr_logger = LearningRateMonitor()  # log the learning rate
# logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=5,
    accelerator="cpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    # callbacks=[lr_logger, early_stop_callback],
    logger=False,
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=2,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=SMAPE(),
    # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 13.9k


c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [22]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)



   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 0     
4  | static_variable_selection          | VariableSelectionNetwork        | 0     
5  | encoder_variable_selection         | VariableSelectionNetwork        | 0     
6  | decoder_variable_selection         | VariableSelectionNetwork        | 0     
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 1.1 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                   

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 4:   0%|          | 0/6 [00:06<?, ?it/s] 

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4:   0%|          | 0/6 [00:06<?, ?it/s]


In [26]:
predictions = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator="cpu"))
predictions
MAE()(predictions.output, predictions.y)


AttributeError: 'list' object has no attribute 'output'

In [23]:
raw_predictions = tft.predict(val_dataloader, mode="raw", return_x=True)
raw_predictions

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[]

In [25]:
tft.eval()

TemporalFusionTransformer(
  	"attention_head_size":               2
  	"categorical_groups":                {}
  	"causal_attention":                  True
  	"dropout":                           0.1
  	"embedding_labels":                  {}
  	"embedding_paddings":                []
  	"embedding_sizes":                   {}
  	"hidden_continuous_size":            8
  	"hidden_continuous_sizes":           {}
  	"hidden_size":                       16
  	"learning_rate":                     0.03
  	"log_gradient_flow":                 False
  	"log_interval":                      -1
  	"log_val_interval":                  -1
  	"logging_metrics":                   ModuleList(
  	  (0): SMAPE()
  	  (1): MAE()
  	  (2): RMSE()
  	  (3): MAPE()
  	)
  	"loss":                              SMAPE()
  	"lstm_layers":                       1
  	"max_encoder_length":                168
  	"monotone_constaints":               {}
  	"optimizer":                         Ranger
  	"optimizer_pa

In [26]:
predictions = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator="cpu"))
predictions

c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
c:\Users\sonng\anaconda3\envs\tsff_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


[]