In [1]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import torch
import numpy as np
import pandas as pd

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

  from tqdm.autonotebook import tqdm


In [2]:
from ESRNN.m4_data import *
from ESRNN.utils_evaluation import evaluate_prediction_owa
from ESRNN.utils_visualization import plot_grid_prediction

In [3]:
X_train_df, y_train_df, X_test_df, y_test_df = prepare_m4_data(dataset_name="Hourly",
                                                               directory="../data/M4",
                                                               num_obs=414)





In [4]:
unique_ids = y_train_df['unique_id'].unique()

In [5]:
all_forecasts = {}

In [6]:
pl.seed_everything(42)

Seed set to 42


42

In [5]:
for unique_id in unique_ids:

    print(f'Currently training: {unique_id}')

    # Filter data for the current series
    df = y_train_df[y_train_df['unique_id'] == unique_id].copy()
    df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600
    df['ds'] = df['ds'].astype(int)

    # Create the TimeSeriesDataSet for training
    max_encoder_length = 24*7
    max_prediction_length = 48

    training = TimeSeriesDataSet(
        df.iloc[:-max_prediction_length],
        time_idx="ds",
        target="y",
        group_ids=['unique_id'],
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        target_normalizer=GroupNormalizer(
            groups=["unique_id"], transformation="softplus"
        ),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        )
    
    validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

    batch_size = 128  # set this between 32 to 128
    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

    # configure network and trainer
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=30, verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate
    logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

    trainer = pl.Trainer(
        max_epochs=100,
        accelerator="gpu",
        enable_model_summary=True,
        gradient_clip_val=0.43012832204522905,
        limit_train_batches=50,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[early_stop_callback],
        logger=False,
    )

    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.05352813757705075,
        hidden_size=60,
        attention_head_size=4,
        dropout=0.12384425005697666,
        hidden_continuous_size=27,
        loss=SMAPE(),
        # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        optimizer="Ranger",
        reduce_on_plateau_patience=4,
    )

    trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    )

    predictions = tft.predict(val_dataloader, return_y=False, trainer_kwargs=dict(accelerator="gpu"))
    all_forecasts[unique_id] = predictions.cpu().numpy().flatten()



NameError: name 'unique_ids' is not defined

In [8]:
all_forecasts

{'H1': array([623.7289 , 593.21106, 579.4463 , 574.5005 , 573.1142 , 573.18256,
        573.84186, 574.8246 , 576.68146, 582.17377, 596.2029 , 620.8917 ,
        643.4739 , 653.3664 , 656.9803 , 658.6606 , 659.76056, 660.6834 ,
        661.55664, 662.4215 , 663.29034, 664.16583, 665.04755, 665.93427,
        666.8248 , 667.71826, 668.6138 , 669.5108 , 670.40906, 671.3083 ,
        672.2081 , 673.10864, 674.0096 , 674.91095, 675.8126 , 676.71466,
        677.6169 , 678.5193 , 679.4219 , 680.32465, 681.2275 , 682.13025,
        683.03296, 683.93567, 684.83826, 685.74066, 686.64294, 687.5449 ],
       dtype=float32),
 'H10': array([434.52084, 433.4885 , 432.73163, 432.1882 , 431.93115, 431.86682,
        431.89804, 431.97617, 432.0794 , 432.1973 , 432.32428, 432.45724,
        432.5944 , 432.7345 , 432.87692, 433.02115, 433.1669 , 433.31393,
        433.46216, 433.61142, 433.76172, 433.91293, 434.0651 , 434.2181 ,
        434.37198, 434.5267 , 434.68222, 434.83856, 434.9957 , 435.1536 ,
 

In [10]:
all_forecasts.values()

dict_values([array([623.7289 , 593.21106, 579.4463 , 574.5005 , 573.1142 , 573.18256,
       573.84186, 574.8246 , 576.68146, 582.17377, 596.2029 , 620.8917 ,
       643.4739 , 653.3664 , 656.9803 , 658.6606 , 659.76056, 660.6834 ,
       661.55664, 662.4215 , 663.29034, 664.16583, 665.04755, 665.93427,
       666.8248 , 667.71826, 668.6138 , 669.5108 , 670.40906, 671.3083 ,
       672.2081 , 673.10864, 674.0096 , 674.91095, 675.8126 , 676.71466,
       677.6169 , 678.5193 , 679.4219 , 680.32465, 681.2275 , 682.13025,
       683.03296, 683.93567, 684.83826, 685.74066, 686.64294, 687.5449 ],
      dtype=float32), array([434.52084, 433.4885 , 432.73163, 432.1882 , 431.93115, 431.86682,
       431.89804, 431.97617, 432.0794 , 432.1973 , 432.32428, 432.45724,
       432.5944 , 432.7345 , 432.87692, 433.02115, 433.1669 , 433.31393,
       433.46216, 433.61142, 433.76172, 433.91293, 434.0651 , 434.2181 ,
       434.37198, 434.5267 , 434.68222, 434.83856, 434.9957 , 435.1536 ,
       435.3122

In [13]:
y_hat_df_tft = X_test_df.copy().rename(columns={'x' : 'y_hat'})
y_hat_df_tft['y_hat'] = pd.Series(dtype='float64')


In [14]:
y_hat_df_tft

Unnamed: 0,unique_id,ds,y_hat
0,H1,1970-01-30 04:00:00,
1,H1,1970-01-30 05:00:00,
2,H1,1970-01-30 06:00:00,
3,H1,1970-01-30 07:00:00,
4,H1,1970-01-30 08:00:00,
...,...,...,...
19867,H99,1970-01-31 23:00:00,
19868,H99,1970-02-01 00:00:00,
19869,H99,1970-02-01 01:00:00,
19870,H99,1970-02-01 02:00:00,


In [15]:
y_hat_df_tft['y_hat'] = np.hstack(all_forecasts.values())

  y_hat_df_tft['y_hat'] = np.hstack(all_forecasts.values())


In [17]:
evaluate_prediction_owa(y_hat_df_tft, y_train_df, X_test_df, y_test_df, naive2_seasonality=24)

OWA: 3.192 
SMAPE: 32.982 
MASE: 10.993 


(3.1919845120105985, 10.992775122479626, 32.981833154514426)

In [18]:
y_hat_df_tft.to_csv('../results/m4/y_hat_df_tft.csv', index=False)

## TFT with base model forecasts pre-train

In [17]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

# import os
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "max_split_size_mb:128"
import torch
import numpy as np
import pandas as pd

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

  from tqdm.autonotebook import tqdm


In [18]:
from ESRNN.m4_data import *
from ESRNN.utils_evaluation import evaluate_prediction_owa
from ESRNN.utils_visualization import plot_grid_prediction

In [19]:
X_train_df, y_train_df, X_test_df, y_test_df = prepare_m4_data(dataset_name="Hourly",
                                                               directory="../data/M4",
                                                               num_obs=414)





In [4]:
unique_ids = y_train_df['unique_id'].unique()

In [5]:
all_forecasts = {}

In [6]:
pl.seed_everything(42)

Seed set to 42


42

In [7]:
df_arima_train = pd.read_csv('../results/m4/base_model_train_set/y_hat_df_arima_ts.csv')
df_theta_train = pd.read_csv('../results/m4/base_model_train_set/y_hat_df_theta_ts.csv')
df_xgb_train = pd.read_csv('../results/m4/base_model_train_set/y_hat_df_xgb_ts.csv')
df_gru_train = pd.read_csv('../results/m4/base_model_train_set/y_hat_df_gru_ts.csv')
df_lstm_train = pd.read_csv('../results/m4/base_model_train_set/y_hat_df_lstm_ts.csv')

In [8]:
df_arima_test = pd.read_csv('../results/m4/y_hat_df_arima.csv')
df_theta_test = pd.read_csv('../results/m4/y_hat_df_theta.csv')
df_xgb_test = pd.read_csv('../results/m4/y_hat_df_xgb.csv')
df_gru_test = pd.read_csv('../results/m4/y_hat_df_gru.csv')
df_lstm_test = pd.read_csv('../results/m4/y_hat_df_lstm.csv')

In [9]:
# unique_id = 'H1'

# df_base_models= pd.DataFrame({
#     'y_arima' : df_arima[df_arima['unique_id'] == unique_id].y_hat,
#     'y_theta' : df_theta[df_theta['unique_id'] == unique_id].y_hat,
#     'y_xgb' : df_xgb[df_xgb['unique_id'] == unique_id].y,
#     'y_gru' : df_gru[df_gru['unique_id'] == unique_id].y_hat,
#     'y_lstm' : df_lstm[df_lstm['unique_id'] == unique_id].y_hat
# })

# df_base_models_train= pd.DataFrame({
#     'unique_id' : df_arima_train.unique_id,
#     'y_arima' : df_arima_train.y_hat,
#     'y_theta' : df_theta_train.y_hat,
#     'y_xgb' : df_xgb_train.y,
#     'y_gru' : df_gru_train.y_hat,
#     'y_lstm' : df_lstm_train.y_hat
# })

In [10]:
# df_base_models_test= pd.DataFrame({
#     'unique_id' : df_arima_test.unique_id,
#     'y_arima' : df_arima_test.y_hat,
#     'y_theta' : df_theta_test.y_hat,
#     'y_xgb' : df_xgb_test.y_hat,
#     'y_gru' : df_gru_test.y_hat,
#     'y_lstm' : df_lstm_test.y_hat
# })

In [11]:
# print(f'Currently training: {unique_id}')

# # Filter data for the current series
# df = y_train_df[y_train_df['unique_id'] == unique_id].copy()
# df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600
# df['ds'] = df['ds'].astype(int)
# df_train_val = pd.concat([df.iloc[-24*7:].reset_index(drop=True).drop(columns=['unique_id']), 
#                           df_base_models_train[df_base_models_train['unique_id']=='H1'].reset_index(drop=True)], axis=1)
# df_train_val

In [12]:
# print(f'Currently training: {unique_id}')

# # Filter data for the current series
# df = y_test_df[y_test_df['unique_id'] == unique_id].copy()
# df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600 + 700
# df['ds'] = df['ds'].astype(int)
# df_test = pd.concat([df.reset_index(drop=True).drop(columns=['unique_id']), 
#                           df_base_models_test[df_base_models_test['unique_id']=='H1'].reset_index(drop=True)], axis=1)
# df_test

In [40]:
for unique_id in unique_ids[79:]:

    print(f'Currently training: {unique_id}')

    df_base_models_train= pd.DataFrame({
    'unique_id' : df_arima_train.unique_id,
    'y_arima' : df_arima_train.y_hat,
    'y_theta' : df_theta_train.y_hat,
    'y_xgb' : df_xgb_train.y,
    'y_gru' : df_gru_train.y_hat,
    'y_lstm' : df_lstm_train.y_hat
    })

    df_base_models_test= pd.DataFrame({
    'unique_id' : df_arima_test.unique_id,
    'y_arima' : df_arima_test.y_hat,
    'y_theta' : df_theta_test.y_hat,
    'y_xgb' : df_xgb_test.y_hat,
    'y_gru' : df_gru_test.y_hat,
    'y_lstm' : df_lstm_test.y_hat
    })

    # Filter data for the current series (train and val data)
    df = y_train_df[y_train_df['unique_id'] == unique_id].copy()
    df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600
    df['ds'] = df['ds'].astype(int)
    df_train_val = pd.concat([df.iloc[-24*7:].reset_index(drop=True).drop(columns=['unique_id']), 
                              df_base_models_train[df_base_models_train['unique_id']==unique_id].reset_index(drop=True)], axis=1)
    
    # Test data
    df = y_test_df.drop(columns=['y_hat_naive2'])[y_test_df['unique_id'] == unique_id].copy()
    df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600 + df_train_val['ds'].max() + 1  #700
    df['ds'] = df['ds'].astype(int)
    df_test = pd.concat([df.reset_index(drop=True).drop(columns=['unique_id']), 
                         df_base_models_test[df_base_models_test['unique_id']==unique_id].reset_index(drop=True)], axis=1)

    # Create the TimeSeriesDataSet for training
    max_encoder_length = 24*7
    max_prediction_length = 48

    training = TimeSeriesDataSet(
        df_train_val.iloc[:-max_prediction_length],
        time_idx="ds",
        target="y",
        group_ids=['unique_id'],
        min_encoder_length=1,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=max_prediction_length,
        time_varying_known_reals=['y_arima', 'y_theta', 'y_xgb', 'y_gru', 'y_lstm'],  # Base model forecasts
        target_normalizer=GroupNormalizer(
            groups=["unique_id"], transformation="softplus"
        ),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        # allow_missing_timesteps=Truex
        )
    
    validation = TimeSeriesDataSet.from_dataset(training, df_train_val, predict=True, stop_randomization=True)
    test = TimeSeriesDataSet.from_dataset(training, df_test, 
                                          min_encoder_length=1,
                                          min_prediction_length=1,
                                          max_encoder_length = 48,
                                          max_prediction_length = 48,
                                        #   min_prediction_idx=df_test['ds'].min(), 
                                          predict=True, 
                                          stop_randomization=True)

    batch_size = 64  # set this between 32 to 128
    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
    test_dataloader = test.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    # configure network and trainer
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-5, patience=50, verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate
    logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

    trainer = pl.Trainer(
        max_epochs=100,
        accelerator="gpu",
        # enable_model_summary=True,
        gradient_clip_val=0.43012832204522905,
        limit_train_batches=50,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[early_stop_callback],
        logger=False,
        enable_model_summary=False
    )

    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.05352813757705075,
        hidden_size=60,
        attention_head_size=4,
        dropout=0.12384425005697666,
        hidden_continuous_size=27,
        loss=SMAPE(),
        # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        optimizer="Ranger",
        reduce_on_plateau_patience=4,
        # print_summary=False
    )

    trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,

    )

    predictions = tft.predict(test_dataloader, return_y=False, trainer_kwargs=dict(accelerator="gpu"))
    all_forecasts[unique_id] = predictions.cpu().numpy().flatten()



NameError: name 'unique_ids' is not defined

In [24]:
unique_id='H170'
df = y_train_df[y_train_df['unique_id'] == unique_id].copy()
df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600
df['ds'] = df['ds'].astype(int)
df_train_val = pd.concat([df.iloc[-24*7:].reset_index(drop=True).drop(columns=['unique_id']), 
                            df_base_models_train[df_base_models_train['unique_id']==unique_id].reset_index(drop=True)], axis=1)


In [38]:
y_hat_df.to_csv('../results/m4/TFT/training/y_hat_df_tft_bm7.csv', index=False)

In [37]:
evaluate_prediction_owa(y_hat_df, y_train_df, X_test_df, y_test_df, naive2_seasonality=24)

OWA: 1.188 
SMAPE: 17.91 
MASE: 3.359 


(1.188340504436854, 3.3587591134382038, 17.910434371690613)

In [39]:
y_hat_df

Unnamed: 0,unique_id,ds,y_hat
0,H1,1970-01-30 04:00:00,547.211700
1,H1,1970-01-30 05:00:00,509.715400
2,H1,1970-01-30 06:00:00,483.367130
3,H1,1970-01-30 07:00:00,465.794560
4,H1,1970-01-30 08:00:00,458.633670
...,...,...,...
19867,H99,1970-01-31 23:00:00,25568.467000
19868,H99,1970-02-01 00:00:00,25360.758000
19869,H99,1970-02-01 01:00:00,25109.238000
19870,H99,1970-02-01 02:00:00,22979.203000
