In [1]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

# import os
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "max_split_size_mb:128"
import torch
import numpy as np
import pandas as pd

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

  from tqdm.autonotebook import tqdm


In [2]:
from ESRNN.m4_data import *
from ESRNN.utils_evaluation import evaluate_prediction_owa
from ESRNN.utils_visualization import plot_grid_prediction

In [3]:
X_train_df, y_train_df, X_test_df, y_test_df = prepare_m4_data(dataset_name="Hourly",
                                                               directory="../../data/M4",
                                                               num_obs=414)





In [6]:
unique_ids = y_train_df['unique_id'].unique()

In [7]:
all_forecasts = {}

In [8]:
pl.seed_everything(42)

Seed set to 42


42

In [9]:
df_arima_train = pd.read_csv('../../results/m4/base_model_train_set/y_hat_df_arima_ts.csv')
df_theta_train = pd.read_csv('../../results/m4/base_model_train_set/y_hat_df_theta_ts.csv')
df_xgb_train = pd.read_csv('../../results/m4/base_model_train_set/y_hat_df_xgb_ts.csv')
df_gru_train = pd.read_csv('../../results/m4/base_model_train_set/y_hat_df_gru_ts.csv')
df_lstm_train = pd.read_csv('../../results/m4/base_model_train_set/y_hat_df_lstm_ts.csv')

In [10]:
df_arima_test = pd.read_csv('../../results/m4/y_hat_df_arima.csv')
df_theta_test = pd.read_csv('../../results/m4/y_hat_df_theta.csv')
df_xgb_test = pd.read_csv('../../results/m4/y_hat_df_xgb.csv')
df_gru_test = pd.read_csv('../../results/m4/y_hat_df_gru.csv')
df_lstm_test = pd.read_csv('../../results/m4/y_hat_df_lstm.csv')

In [1]:
for unique_id in unique_ids:

    print(f'Currently training: {unique_id}')

    df_base_models_train= pd.DataFrame({
    'unique_id' : df_arima_train.unique_id,
    'y_arima' : df_arima_train.y_hat,
    'y_theta' : df_theta_train.y_hat,
    'y_xgb' : df_xgb_train.y,
    'y_gru' : df_gru_train.y_hat,
    'y_lstm' : df_lstm_train.y_hat
    })

    df_base_models_test= pd.DataFrame({
    'unique_id' : df_arima_test.unique_id,
    'y_arima' : df_arima_test.y_hat,
    'y_theta' : df_theta_test.y_hat,
    'y_xgb' : df_xgb_test.y_hat,
    'y_gru' : df_gru_test.y_hat,
    'y_lstm' : df_lstm_test.y_hat
    })

    # Filter data for the current series (train and val data)
    df = y_train_df[y_train_df['unique_id'] == unique_id].copy()
    df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600
    df['ds'] = df['ds'].astype(int)
    df_train_val = pd.concat([df.iloc[-24*7:].reset_index(drop=True).drop(columns=['unique_id']), 
                              df_base_models_train[df_base_models_train['unique_id']==unique_id].reset_index(drop=True)], axis=1)
    
    # Test data
    df = y_test_df.drop(columns=['y_hat_naive2'])[y_test_df['unique_id'] == unique_id].copy()
    df['ds'] = (df['ds'] - df['ds'].min()).dt.total_seconds() // 3600 + df_train_val['ds'].max() + 1  #700
    df['ds'] = df['ds'].astype(int)
    df_test = pd.concat([df.reset_index(drop=True).drop(columns=['unique_id']), 
                         df_base_models_test[df_base_models_test['unique_id']==unique_id].reset_index(drop=True)], axis=1)

    # Create the TimeSeriesDataSet for training
    max_encoder_length = 24*7
    max_prediction_length = 48

    training = TimeSeriesDataSet(
        df_train_val.iloc[:-max_prediction_length],
        time_idx="ds",
        target="y",
        group_ids=['unique_id'],
        max_encoder_length=max_encoder_length,
        # min_encoder_length=max_encoder_length // 2,
        min_encoder_length=1,
        max_prediction_length=max_prediction_length,
        # min_prediction_length=max_prediction_length // 2,
        min_prediction_length=1,
        time_varying_known_reals=['y_arima', 'y_theta', 'y_xgb', 'y_gru', 'y_lstm'],  # Base model forecasts
        target_normalizer=GroupNormalizer(
            groups=["unique_id"], transformation="softplus"
        ),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        # allow_missing_timesteps=Truex
        )
    
    validation = TimeSeriesDataSet.from_dataset(training, df_train_val, predict=True, stop_randomization=True)
    # test = TimeSeriesDataSet.from_dataset(validation, df_test, 
    #                                       min_encoder_length=1,
    #                                       min_prediction_length=1,
    #                                       max_encoder_length = max_encoder_length,
    #                                       max_prediction_length = max_prediction_length,
    #                                     #   min_prediction_idx=df_test['ds'].min(), 
    #                                       predict=True, 
    #                                       stop_randomization=True)

    # creating the test data that includes the encoder and decoder data
    encoder_data = df_train_val[lambda x: x.ds > x.ds.max() - max_encoder_length]
    df_test.y = df_train_val.y[df_train_val.ds == df_train_val.ds.max()].values[0]
    decoder_data = df_test
    new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)

    batch_size = 64  # set this between 32 to 128
    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
    # test_dataloader = test.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

    # configure network and trainer
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-5, patience=50, verbose=False, mode="min")
    lr_logger = LearningRateMonitor()  # log the learning rate
    logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

    trainer = pl.Trainer(
        max_epochs=150,
        accelerator="gpu",
        # enable_model_summary=True,
        gradient_clip_val=0.43012832204522905,
        limit_train_batches=50,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[early_stop_callback],
        logger=False,
        enable_model_summary=False
    )

    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.05352813757705075,
        hidden_size=60,
        attention_head_size=4,
        dropout=0.12384425005697666,
        hidden_continuous_size=27,
        loss=SMAPE(),
        # log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
        optimizer="Ranger",
        reduce_on_plateau_patience=4,
        # print_summary=False
    )

    trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,

    )

    # predictions = tft.predict(test_dataloader, return_y=False, trainer_kwargs=dict(accelerator="gpu"))
    new_raw_predictions = tft.predict(new_prediction_data, mode="raw", return_x=True, trainer_kwargs=dict(accelerator="gpu"))
    all_forecasts[unique_id] = new_raw_predictions.output.prediction.cpu().numpy().flatten()



NameError: name 'unique_ids' is not defined

In [11]:
y_hat_df = pd.read_csv('../../results/m4/TFT/training/y_hat_df_tft_bm7_fix.csv')

In [12]:
evaluate_prediction_owa(y_hat_df, y_train_df, X_test_df, y_test_df, naive2_seasonality=24)

OWA: 0.789 
SMAPE: 15.595 
MASE: 1.749 


(0.7892891470266732, 1.7489556614022561, 15.594886822602454)

First tuned run
===============  Model evaluation  ==============
OWA: 0.789 
SMAPE: 15.595 
MASE: 1.749 


In [15]:
y_hat_df = pd.read_csv('../../results/m4/TFT/test/y_hat_df_tft_bm7_tuned_2.csv')

In [14]:
ds = y_hat_df[['ds']]
ds

Unnamed: 0,ds
0,1970-01-30 04:00:00
1,1970-01-30 05:00:00
2,1970-01-30 06:00:00
3,1970-01-30 07:00:00
4,1970-01-30 08:00:00
...,...
19867,1970-01-31 23:00:00
19868,1970-02-01 00:00:00
19869,1970-02-01 01:00:00
19870,1970-02-01 02:00:00


In [17]:
y_hat_df['ds'] = ds

In [19]:
evaluate_prediction_owa(y_hat_df, y_train_df, X_test_df, y_test_df, naive2_seasonality=24)

OWA: 0.809 
SMAPE: 15.889 
MASE: 1.805 


(0.808897238039389, 1.8045535715957854, 15.889057194551462)

Second tuned run
===============  Model evaluation  ==============
OWA: 0.809 
SMAPE: 15.889 
MASE: 1.805 
