## How to use Transformer Networks to build a Forecasting model: evaluate
- https://towardsdatascience.com/how-to-use-transformer-networks-to-build-a-forecasting-model-297f9270e630

<div style="text-align: right"> <b>Author : Kwang Myung Yu</b></div>
<div style="text-align: right"> Initial upload: 2023.11.06</div>
<div style="text-align: right"> Last update: 2023.11.06</div>

In [1]:
import datetime
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings; warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline
# print(plt.stype.available)

# Options for pandas
pd.options.display.max_columns = 30

In [2]:
import json
from tqdm import tqdm
import pytorch_lightning as pl
import torch
import torch.nn as nn
from torchinfo import summary
from model import TimeSeriesForcasting
from train_utils import split_df, Dataset
from evaluation import evaluate_regression

from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

In [3]:
data_csv_path = "data/processed_data.csv"
feature_target_names_path = "data/config.json"
trained_json_path = "models/trained_config.json"
eval_json_path = "data/eval.json"
data_for_visualization_path = "data/visualization.json"

In [4]:
data = pd.read_csv(data_csv_path)

In [5]:
data.head()

Unnamed: 0,timestamp,index,article,amplitude,offset,views,day_of_month,day_of_year,month,week_of_year,year,views_lag_1
0,2015-01-01,0,e288d86c0c8641a7b25ba1cc435e28d7,3.203435,0.774873,2.014996,0.032258,0.00274,0.083333,0.018868,0.0,0.0
1,2015-01-02,1,e288d86c0c8641a7b25ba1cc435e28d7,3.197775,0.743587,3.224294,0.064516,0.005479,0.083333,0.018868,0.0,2.014996
2,2015-01-03,2,e288d86c0c8641a7b25ba1cc435e28d7,3.192114,0.716603,2.987859,0.096774,0.008219,0.083333,0.018868,0.0,3.224294
3,2015-01-04,3,e288d86c0c8641a7b25ba1cc435e28d7,3.186454,0.694619,3.996244,0.129032,0.010959,0.083333,0.018868,0.0,2.987859
4,2015-01-05,4,e288d86c0c8641a7b25ba1cc435e28d7,3.180794,0.678206,3.76322,0.16129,0.013699,0.083333,0.037736,0.0,3.996244


In [6]:
with open(trained_json_path) as f:
    model_json = json.load(f)
    
model_json

{'val_loss': 0.5206373333930969, 'best_model_path': 'ts_models/ts.ckpt'}

In [7]:
model_path = model_json["best_model_path"]
model_path

'ts_models/ts.ckpt'

In [8]:
with open(feature_target_names_path) as f:
    feature_target_names = json.load(f)
    
feature_target_names

{'features': ['day_of_month', 'day_of_year', 'month', 'week_of_year', 'year'],
 'target': 'views',
 'group_by_key': 'article',
 'lag_features': ['views_lag_1']}

In [9]:
horizon_size: int = 30

target = feature_target_names["target"]

data_train = data[~data[target].isna()]

grp_by_train = data_train.groupby(by=feature_target_names["group_by_key"])

groups = list(grp_by_train.groups)

full_groups = [
        grp for grp in groups if grp_by_train.get_group(grp).shape[0] > horizon_size
    ]

val_data = Dataset(
        groups=full_groups,
        grp_by=grp_by_train,
        split="val",
        features=feature_target_names["features"],
        target=feature_target_names["target"],
    )

In [10]:
model = TimeSeriesForcasting(
        n_encoder_inputs=len(feature_target_names["features"]) + 1,
        n_decoder_inputs=len(feature_target_names["features"]) + 1,
        lr=1e-4,
        dropout=0.5,
        )

model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [11]:
model

TimeSeriesForcasting(
  (input_pos_embedding): Embedding(1024, 512)
  (target_pos_embedding): Embedding(1024, 512)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.5, inplace=False)
        (linear2): Linear(in_features=2048, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.5, inplace=False)
        (dropout2): Dropout(p=0.5, inplace=False)
      )
    )
  )
  (decoder): TransformerDecoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamically

In [12]:
model.eval()

TimeSeriesForcasting(
  (input_pos_embedding): Embedding(1024, 512)
  (target_pos_embedding): Embedding(1024, 512)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.5, inplace=False)
        (linear2): Linear(in_features=2048, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.5, inplace=False)
        (dropout2): Dropout(p=0.5, inplace=False)
      )
    )
  )
  (decoder): TransformerDecoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamically

In [13]:
gt = []
baseline_last_known_values = []
neural_predictions = []

data_for_visualization = []

In [14]:
for i, group in tqdm(enumerate(full_groups[:100])):
    time_series_data = {"history": [], "ground_truth": [], "prediction": []}

    df = grp_by_train.get_group(group)
    src, trg = split_df(df, split="val")

    time_series_data["history"] = src[target].tolist()[-120:]
    time_series_data["ground_truth"] = trg[target].tolist()

    last_known_value = src[target].values[-1]

    trg["last_known_value"] = last_known_value

    gt += trg[target].tolist()
    baseline_last_known_values += trg["last_known_value"].tolist()

    src, trg_in, _ = val_data[i]

    src, trg_in = src.unsqueeze(0), trg_in.unsqueeze(0)

    with torch.no_grad():
        prediction = model((src, trg_in[:, :1, :]))
        for j in range(1, horizon_size):
            last_prediction = prediction[0, -1]
            trg_in[:, j, -1] = last_prediction
            prediction = model((src, trg_in[:, : (j + 1), :]))

        trg[target + "_predicted"] = (prediction.squeeze().numpy()).tolist()

        neural_predictions += trg[target + "_predicted"].tolist()

        time_series_data["prediction"] = trg[target + "_predicted"].tolist()

    data_for_visualization.append(time_series_data)

0it [00:00, ?it/s]

100it [02:15,  1.36s/it]


In [15]:
baseline_eval = evaluate_regression(gt, baseline_last_known_values)
model_eval = evaluate_regression(gt, neural_predictions)

eval_dict = {
        "Baseline_MAE": baseline_eval["mae"],
        "Baseline_SMAPE": baseline_eval["smape"],
        "Model_MAE": model_eval["mae"],
        "Model_SMAPE": model_eval["smape"],
    }

In [19]:
eval_dict

{'Baseline_MAE': 0.9922331294729232,
 'Baseline_SMAPE': 121.3300692195531,
 'Model_MAE': 0.902128204793807,
 'Model_SMAPE': 102.98853474709719}

In [20]:
if eval_json_path is not None:
    with open(eval_json_path, "w") as f:
        json.dump(eval_dict, f, indent=4)

if data_for_visualization_path is not None:
    with open(data_for_visualization_path, "w") as f:
        json.dump(data_for_visualization, f, indent=4)

for k, v in eval_dict.items():
    print(k, v)

Baseline_MAE 0.9922331294729232
Baseline_SMAPE 121.3300692195531
Model_MAE 0.902128204793807
Model_SMAPE 102.98853474709719


In [21]:
data_for_visualization

[{'history': [0.610500852344215,
   0.610054454409415,
   0.4023134956689099,
   0.3971040880611559,
   0.2631969185272264,
   -0.0516942314123951,
   -0.1049281671177984,
   -0.2155631828806526,
   -0.2870910222858039,
   -0.4785117067052248,
   -0.8117817899087543,
   -0.9330199844857096,
   -0.754202992062517,
   -0.7317612198697404,
   -0.9963079802531852,
   -0.7269466901979751,
   -0.7527831440272366,
   -0.5880430067137474,
   -0.397062405593659,
   -0.385320050060141,
   -0.0749106959983038,
   -0.171036529991964,
   -0.0159372727496762,
   0.4032415743502633,
   0.6994664946567375,
   1.1551370507935357,
   1.267357337532908,
   1.4586880747774311,
   1.354842158844951,
   1.4070411671373688,
   1.39441042716008,
   1.57562548499594,
   1.353795379045818,
   1.33511353478111,
   1.3037256299556854,
   1.1460755987128175,
   1.107179276776774,
   0.7508673199308397,
   0.4371087243385602,
   0.1875740951020404,
   -0.1027751007278148,
   -0.1521433279270117,
   -0.4626666404418