In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.preprocessing import FunctionTransformer
import pickle
from typing import Dict



import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots as sp

from tqdm import tqdm
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline
from sklego.preprocessing import RepeatingBasisFunction

import seaborn as sns
from datetime import datetime
from datetime import date
from datetime import timedelta

import holidays
import yaml

import torch
from torch import nn
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss, MultiLoss, CrossEntropy
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting.data.encoders import NaNLabelEncoder, MultiNormalizer, EncoderNormalizer, TorchNormalizer
from lightning.pytorch.utilities.model_summary import ModelSummary
from pytorch_forecasting.models import BaseModel
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, TomekLinks, NeighbourhoodCleaningRule
from sklearn.model_selection import  StratifiedShuffleSplit
from sklearn.preprocessing import  OneHotEncoder, FunctionTransformer, StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.utils import class_weight
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import  HistGradientBoostingRegressor

Read the Parquet file into a pandas DataFrame

In [2]:
from pytorch_forecasting.data.examples import get_stallion_data

data = get_stallion_data()

In [3]:
data.head()

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,avg_population_2017,avg_yearly_household_income_2017,easter_day,good_friday,new_year,christmas,labor_day,independence_day,revolution_day_memorial,regional_games,fifa_u_17_world_cup,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries
0,Agency_22,SKU_01,52.272,2013-01-01,492612703,718394219,25.845238,1168.903668,1069.166193,99.737475,48151,132110,0,0,1,0,0,0,0,0,0,0,0,0,8.532566,0
238,Agency_37,SKU_04,0.0,2013-01-01,492612703,718394219,26.505,1852.273642,1611.466298,240.807344,32769,96761,0,0,1,0,0,0,0,0,0,0,0,0,13.000635,5
237,Agency_59,SKU_03,812.9214,2013-01-01,492612703,718394219,22.219737,1270.795012,1197.18426,73.610752,1219986,218902,0,0,1,0,0,0,0,0,0,0,0,0,5.792496,9
236,Agency_11,SKU_01,316.44,2013-01-01,492612703,718394219,25.36,1176.155397,1082.757488,93.397909,135561,100461,0,0,1,0,0,0,0,0,0,0,0,0,7.94095,14
235,Agency_05,SKU_05,420.9093,2013-01-01,492612703,718394219,24.079012,1327.003396,1207.822992,119.180404,3044268,182944,0,0,1,0,0,0,0,0,0,0,0,0,8.981168,22


add time index

In [4]:
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
data["time_idx"] -= data["time_idx"].min()

add additional features

In [5]:
data["month"] = data.date.dt.month.astype(str).astype("category")  # categories have be strings
data["log_volume"] = np.log(data.volume + 1e-8)
data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")

we want to encode special days as one variable and thus need to first reverse one-hot encoding

In [6]:
special_days = [
    "easter_day",
    "good_friday",
    "new_year",
    "christmas",
    "labor_day",
    "independence_day",
    "revolution_day_memorial",
    "regional_games",
    "fifa_u_17_world_cup",
    "football_gold_cup",
    "beer_capital",
    "music_fest",
]
data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
data.sample(10, random_state=521)

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,avg_population_2017,avg_yearly_household_income_2017,easter_day,good_friday,new_year,christmas,labor_day,independence_day,revolution_day_memorial,regional_games,fifa_u_17_world_cup,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries,time_idx,month,log_volume,avg_volume_by_sku,avg_volume_by_agency
291,Agency_25,SKU_03,0.5076,2013-01-01,492612703,718394219,25.845238,1264.162234,1152.473405,111.688829,53598,95032,-,-,new_year,-,-,-,-,-,-,-,-,-,8.835008,228,0,1,-0.678062,1225.306376,99.6504
871,Agency_29,SKU_02,8.748,2015-01-01,498567142,762225057,27.584615,1316.098485,1296.804924,19.293561,28329,93614,-,-,new_year,-,-,-,-,-,-,-,-,-,1.465966,177,24,1,2.168825,1634.434615,11.397086
19532,Agency_47,SKU_01,4.968,2013-09-01,454252482,789624076,30.665957,1269.25,1266.49049,2.75951,32183,128574,-,-,-,-,-,independence_day,-,-,-,-,-,-,0.217413,322,8,9,1.603017,2625.472644,48.29565
2089,Agency_53,SKU_07,21.6825,2013-10-01,480693900,791658684,29.197727,1193.842373,1128.124395,65.717978,1729177,111048,-,-,-,-,-,-,-,-,-,-,beer_capital,-,5.504745,240,9,10,3.076505,38.529107,2511.035175
9755,Agency_17,SKU_02,960.552,2015-03-01,515468092,871204688,23.60812,1338.334248,1232.128069,106.206179,147910,123013,-,-,-,-,-,-,-,-,-,-,-,music_fest,7.935699,259,26,3,6.867508,2143.677462,396.02214
7561,Agency_05,SKU_03,1184.6535,2014-02-01,425528909,734443953,28.668254,1369.556376,1161.135214,208.421162,3044268,182944,-,-,-,-,-,-,-,-,-,-,-,-,15.218151,21,13,2,7.077206,1566.643589,1881.866367
19204,Agency_11,SKU_05,5.5593,2017-08-01,623319783,1049868815,31.915385,1922.486644,1651.307674,271.17897,135561,100461,-,-,-,-,-,-,-,-,-,-,-,-,14.105636,17,55,8,1.715472,1385.225478,109.6992
8781,Agency_48,SKU_04,4275.1605,2013-03-01,509281531,892192092,26.767857,1761.258209,1546.05967,215.198539,1739969,210213,easter_day,good_friday,-,-,-,-,-,-,-,-,-,music_fest,12.218455,151,2,3,8.360577,1757.950603,1925.272108
2540,Agency_07,SKU_21,0.0,2015-10-01,544203593,761469815,28.987755,0.0,0.0,0.0,1868030,175785,-,-,-,-,-,-,-,-,-,-,-,-,0.0,300,33,10,-18.420681,0.0,2418.71955
12084,Agency_21,SKU_03,46.3608,2017-04-01,589969396,940912941,32.47891,1675.922116,1413.571789,262.350327,26635,130092,easter_day,good_friday,-,-,-,-,-,-,-,-,-,-,15.654088,181,51,4,3.836454,2034.293024,109.3818


In [7]:
def summarize_categoricals(df, show_levels=False):
    """
        Display uniqueness in each column
    """
    data = [[df[c].unique(), len(df[c].unique()), df[c].isnull().sum()] for c in df.columns]
    df_temp = pd.DataFrame(data, index=df.columns,
                           columns=['Levels', 'No. of Levels', 'No. of Missing Values'])
    return df_temp.iloc[:, 0 if show_levels else 1:]

In [8]:
summarize_categoricals(data[['agency','sku','easter_day','timeseries','month']], show_levels=True)

Unnamed: 0,Levels,No. of Levels,No. of Missing Values
agency,"['Agency_22', 'Agency_37', 'Agency_59', 'Agenc...",58,0
sku,"['SKU_01', 'SKU_04', 'SKU_03', 'SKU_05', 'SKU_...",25,0
easter_day,"['-', 'easter_day'] Categories (2, object): ['...",2,0
timeseries,"[0, 5, 9, 14, 22, 32, 23, 37, 44, 18, 50, 58, ...",350,0
month,"['1', '2', '3', '4', '5', ..., '8', '9', '10',...",12,0


In [9]:
data.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
volume,21000.0,1492.403982,0.0,8.272388,158.436,1774.793475,22526.61,2711.496882
date,21000.0,2015-06-16 20:48:00,2013-01-01 00:00:00,2014-03-24 06:00:00,2015-06-16 00:00:00,2016-09-08 12:00:00,2017-12-01 00:00:00,
industry_volume,21000.0,543921415.616667,413051813.0,509055292.5,551199995.5,589371547.75,670015726.0,62880220.210095
soda_volume,21000.0,851200017.016667,696401477.0,789088000.5,864919588.0,900555088.0,1049868815.0,78243400.698103
avg_max_temp,21000.0,28.612404,16.731034,25.374816,28.479272,31.568405,45.290476,3.972833
price_regular,21000.0,1451.536344,0.0,1311.547158,1495.174592,1725.65208,19166.625,683.362417
price_actual,21000.0,1267.34745,-3121.690141,1178.365653,1324.695705,1517.311427,4925.404,587.757323
discount,21000.0,184.374146,0.0,54.935108,138.307225,272.29863,19166.625,257.469968
avg_population_2017,21000.0,1045064.568571,12271.0,60189.0,1232242.0,1729177.0,3137874.0,929192.5853
avg_yearly_household_income_2017,21000.0,151073.494286,90240.0,110057.0,131411.0,206553.0,247220.0,50409.593114


In [10]:
filter_data = data[(data['agency']=='Agency_57') & (data['sku']=='SKU_03')].sort_values(by='time_idx')
filter_data.head() # Luôn đủ time idx cho agency và SKU không có mising values

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,avg_population_2017,avg_yearly_household_income_2017,easter_day,good_friday,new_year,christmas,labor_day,independence_day,revolution_day_memorial,regional_games,fifa_u_17_world_cup,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries,time_idx,month,log_volume,avg_volume_by_sku,avg_volume_by_agency
232,Agency_57,SKU_03,782.127,2013-01-01,492612703,718394219,22.219737,1268.558431,1165.031186,103.527245,2212416,187351,-,-,new_year,-,-,-,-,-,-,-,-,-,8.161015,37,0,1,6.662017,1225.306376,1821.2925
7284,Agency_57,SKU_03,801.585,2013-02-01,431937346,753938444,25.6375,1267.149338,1158.123783,109.025555,2212416,187351,-,-,-,-,-,-,-,-,-,-,-,-,8.604002,37,1,2,6.686591,1289.885997,1963.349531
8968,Agency_57,SKU_03,866.9805,2013-03-01,509281531,892192092,24.925652,1285.234686,1177.612551,107.622135,2212416,187351,easter_day,good_friday,-,-,-,-,-,-,-,-,-,music_fest,8.373734,37,2,3,6.765016,1432.225016,2162.780156
10811,Agency_57,SKU_03,1307.40825,2013-04-01,532390389,838099501,27.930736,1312.239461,1129.051331,183.18813,2212416,187351,-,-,-,-,-,-,-,-,-,-,-,-,13.959962,37,3,4,7.175802,1564.910937,2433.855281
12540,Agency_57,SKU_03,1020.4455,2013-05-01,551755254,864420003,27.555022,1318.155168,1122.874614,195.280554,2212416,187351,-,-,-,-,labor_day,-,-,-,-,-,-,-,14.814686,37,4,5,6.927995,1741.896147,2385.850219


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21000 entries, 0 to 6650
Data columns (total 31 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   agency                            21000 non-null  category      
 1   sku                               21000 non-null  category      
 2   volume                            21000 non-null  float64       
 3   date                              21000 non-null  datetime64[ns]
 4   industry_volume                   21000 non-null  int64         
 5   soda_volume                       21000 non-null  int64         
 6   avg_max_temp                      21000 non-null  float64       
 7   price_regular                     21000 non-null  float64       
 8   price_actual                      21000 non-null  float64       
 9   discount                          21000 non-null  float64       
 10  avg_population_2017               21000 non-null  in

In [12]:
data.head()

Unnamed: 0,agency,sku,volume,date,industry_volume,soda_volume,avg_max_temp,price_regular,price_actual,discount,avg_population_2017,avg_yearly_household_income_2017,easter_day,good_friday,new_year,christmas,labor_day,independence_day,revolution_day_memorial,regional_games,fifa_u_17_world_cup,football_gold_cup,beer_capital,music_fest,discount_in_percent,timeseries,time_idx,month,log_volume,avg_volume_by_sku,avg_volume_by_agency
0,Agency_22,SKU_01,52.272,2013-01-01,492612703,718394219,25.845238,1168.903668,1069.166193,99.737475,48151,132110,-,-,new_year,-,-,-,-,-,-,-,-,-,8.532566,0,0,1,3.956461,2613.377501,103.80546
238,Agency_37,SKU_04,0.0,2013-01-01,492612703,718394219,26.505,1852.273642,1611.466298,240.807344,32769,96761,-,-,new_year,-,-,-,-,-,-,-,-,-,13.000635,5,0,1,-18.420681,1361.511918,0.5499
237,Agency_59,SKU_03,812.9214,2013-01-01,492612703,718394219,22.219737,1270.795012,1197.18426,73.610752,1219986,218902,-,-,new_year,-,-,-,-,-,-,-,-,-,5.792496,9,0,1,6.700634,1225.306376,2041.909586
236,Agency_11,SKU_01,316.44,2013-01-01,492612703,718394219,25.36,1176.155397,1082.757488,93.397909,135561,100461,-,-,new_year,-,-,-,-,-,-,-,-,-,7.94095,14,0,1,5.757134,2613.377501,125.69022
235,Agency_05,SKU_05,420.9093,2013-01-01,492612703,718394219,24.079012,1327.003396,1207.822992,119.180404,3044268,182944,-,-,new_year,-,-,-,-,-,-,-,-,-,8.981168,22,0,1,6.042417,1179.728165,1638.4635


In [13]:
cate_cols = ['new_year','month']
for col in cate_cols:
    haha = NaNLabelEncoder(add_nan=True).fit_transform(data[col])
    print (f"col {col} >>> {set(np.array(haha))}" )

col new_year >>> {1, 2}
col month >>> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}


In [14]:
data[cate_cols].info()

<class 'pandas.core.frame.DataFrame'>
Index: 21000 entries, 0 to 6650
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   new_year  21000 non-null  category
 1   month     21000 non-null  category
dtypes: category(2)
memory usage: 205.6 KB


In [None]:
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = data["time_idx"].max() - max_prediction_length

## Create Dataset

In [None]:
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target=["volume","month"],
    group_ids=["agency", "sku"],
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["agency", "sku"],
    static_reals=["avg_population_2017", "avg_yearly_household_income_2017"],
    time_varying_known_categoricals=["special_days"],
    variable_groups={"special_days": special_days},  # group of categorical variables can be treated as one variable
    time_varying_known_reals=["time_idx", "price_regular", "discount_in_percent"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "volume",
        "log_volume",
        "industry_volume",
        "soda_volume",
        "avg_max_temp",
        "avg_volume_by_agency",
        "avg_volume_by_sku",
    ],
    target_normalizer=MultiNormalizer(
        [EncoderNormalizer(method='standard',center=True,max_length=None,transformation=None,method_kwargs={}), 
         NaNLabelEncoder(add_nan=True, warn=True), # Use the NaNLabelEncoder to encode categorical target
        #  EncoderNormalizer(method='standard',center=True,max_length=None,transformation=None,method_kwargs={}),
         ]
    ),  
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

In [None]:
x, y = next(iter(training.to_dataloader(batch_size=4)))
y[0]



[tensor([[ 38.1375,  40.9342,  51.6358,  62.7732,  46.6992,  36.0396],
         [323.6760, 300.7800, 532.7640, 453.8160, 456.3000, 356.8320],
         [  0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
         [  0.9585,   0.9585,   0.0000,   1.2780,   0.6390,   0.6390]]),
 tensor([[ 1,  5,  6,  7,  8,  9],
         [ 8,  9, 10, 11, 12,  2],
         [ 7,  8,  9, 10, 11, 12],
         [10, 11, 12,  2,  3,  4]])]

In [None]:
training.target_normalizers

[EncoderNormalizer(
 	method='standard',
 	center=True,
 	max_length=None,
 	transformation=None,
 	method_kwargs={}
 ),
 NaNLabelEncoder(add_nan=True, warn=True)]

In [None]:
training.dropout_categoricals

[]

In [None]:
# create validation set (predict=True) which means to predict the last max_prediction_length points in time for each series
validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)
# create dataloaders for model
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

## configure network and trainer

In [None]:
random_seed = 42

In [None]:
pl.seed_everything(random_seed)
trainer = pl.Trainer(
    accelerator="cpu",
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=[7,13],  # 7 quantiles by default, 3 outputs for Multi-Target. Classification = num_classes + 1
    loss=MultiLoss([QuantileLoss(),CrossEntropy()]), # using MultiLoss for Multi-Target regression/classification
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Number of parameters in network: 29.3k


  rank_zero_warn(
  rank_zero_warn(


In [None]:
# tft.hparams

find optimal learning rate

In [None]:
from lightning.pytorch.tuner import Tuner

In [None]:
res = Tuner(trainer).lr_find( 
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
) 

  rank_zero_warn(
Missing logger folder: /workspaces/multiple_time_series_multitask_learning/lightning_logs


  f.tight_layout()


Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]



: 

: 

In [None]:
print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

NameError: name 'res' is not defined

## Training

In [None]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs_test")  # logging results to a tensorboard

In [None]:
trainer = pl.Trainer(
    max_epochs=2,
    accelerator="cpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=[7,13],  # 7 quantiles by default, two outputs for Multi-Target
    # loss=MultiLoss([QuantileLoss(),QuantileLoss()]), # using MultiLoss for Multi-Target regression/regression
    loss=MultiLoss([QuantileLoss(),CrossEntropy()]), # using MultiLoss for Multi-Target regression/classification
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Number of parameters in network: 29.3k


  rank_zero_warn(
  rank_zero_warn(


In [None]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

Missing logger folder: lightning_logs_test/lightning_logs

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | MultiLoss                       | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1.2 K 
3  | prescalers                         | ModuleDict                      | 256   
4  | static_variable_selection          | VariableSelectionNetwork        | 3.4 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 7.7 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 2.6 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_in

Sanity Checking: 0it [00:00, ?it/s]



Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  f.tight_layout()


Epoch 0:   0%|          | 0/50 [00:00<?, ?it/s]                            



Epoch 0:   2%|▏         | 1/50 [00:00<00:22,  2.18it/s, v_num=0, train_loss_step=301.0]



Epoch 0:   4%|▍         | 2/50 [00:00<00:19,  2.43it/s, v_num=0, train_loss_step=289.0]



Epoch 0:   6%|▌         | 3/50 [00:01<00:19,  2.47it/s, v_num=0, train_loss_step=265.0]



Epoch 0:   8%|▊         | 4/50 [00:01<00:18,  2.53it/s, v_num=0, train_loss_step=364.0]



Epoch 0:  10%|█         | 5/50 [00:01<00:17,  2.63it/s, v_num=0, train_loss_step=343.0]



Epoch 0:  12%|█▏        | 6/50 [00:02<00:16,  2.62it/s, v_num=0, train_loss_step=409.0]



Epoch 0:  14%|█▍        | 7/50 [00:02<00:16,  2.67it/s, v_num=0, train_loss_step=217.0]



Epoch 0:  16%|█▌        | 8/50 [00:02<00:15,  2.69it/s, v_num=0, train_loss_step=232.0]



Epoch 0:  18%|█▊        | 9/50 [00:03<00:15,  2.72it/s, v_num=0, train_loss_step=331.0]



Epoch 0:  20%|██        | 10/50 [00:03<00:14,  2.73it/s, v_num=0, train_loss_step=258.0]



Epoch 0:  22%|██▏       | 11/50 [00:04<00:14,  2.74it/s, v_num=0, train_loss_step=246.0]



Epoch 0:  24%|██▍       | 12/50 [00:04<00:13,  2.73it/s, v_num=0, train_loss_step=278.0]



Epoch 0:  26%|██▌       | 13/50 [00:04<00:13,  2.72it/s, v_num=0, train_loss_step=221.0]



Epoch 0:  28%|██▊       | 14/50 [00:05<00:13,  2.73it/s, v_num=0, train_loss_step=207.0]



Epoch 0:  30%|███       | 15/50 [00:06<00:14,  2.35it/s, v_num=0, train_loss_step=220.0]



Epoch 0:  32%|███▏      | 16/50 [00:06<00:14,  2.36it/s, v_num=0, train_loss_step=220.0]



Epoch 0:  32%|███▏      | 16/50 [00:06<00:14,  2.36it/s, v_num=0, train_loss_step=243.0]



Epoch 0:  34%|███▍      | 17/50 [00:07<00:13,  2.39it/s, v_num=0, train_loss_step=284.0]



Epoch 0:  36%|███▌      | 18/50 [00:07<00:13,  2.40it/s, v_num=0, train_loss_step=261.0]



Epoch 0:  38%|███▊      | 19/50 [00:07<00:12,  2.42it/s, v_num=0, train_loss_step=221.0]



Epoch 0:  40%|████      | 20/50 [00:08<00:12,  2.44it/s, v_num=0, train_loss_step=209.0]



Epoch 0:  42%|████▏     | 21/50 [00:08<00:11,  2.42it/s, v_num=0, train_loss_step=307.0]



Epoch 0:  44%|████▍     | 22/50 [00:08<00:11,  2.45it/s, v_num=0, train_loss_step=293.0]



Epoch 0:  46%|████▌     | 23/50 [00:09<00:10,  2.46it/s, v_num=0, train_loss_step=176.0]



Epoch 0:  48%|████▊     | 24/50 [00:09<00:10,  2.45it/s, v_num=0, train_loss_step=166.0]



Epoch 0:  50%|█████     | 25/50 [00:10<00:10,  2.47it/s, v_num=0, train_loss_step=264.0]



Epoch 0:  52%|█████▏    | 26/50 [00:10<00:09,  2.49it/s, v_num=0, train_loss_step=173.0]



Epoch 0:  54%|█████▍    | 27/50 [00:10<00:09,  2.49it/s, v_num=0, train_loss_step=235.0]



Epoch 0:  56%|█████▌    | 28/50 [00:11<00:08,  2.51it/s, v_num=0, train_loss_step=250.0]



Epoch 0:  58%|█████▊    | 29/50 [00:11<00:08,  2.52it/s, v_num=0, train_loss_step=226.0]



Epoch 0:  60%|██████    | 30/50 [00:11<00:07,  2.51it/s, v_num=0, train_loss_step=125.0]



Epoch 0:  62%|██████▏   | 31/50 [00:12<00:07,  2.52it/s, v_num=0, train_loss_step=192.0]



Epoch 0:  64%|██████▍   | 32/50 [00:12<00:07,  2.53it/s, v_num=0, train_loss_step=133.0]



Epoch 0:  66%|██████▌   | 33/50 [00:13<00:06,  2.53it/s, v_num=0, train_loss_step=148.0]



Epoch 0:  68%|██████▊   | 34/50 [00:13<00:06,  2.54it/s, v_num=0, train_loss_step=166.0]



Epoch 0:  70%|███████   | 35/50 [00:13<00:05,  2.55it/s, v_num=0, train_loss_step=226.0]



Epoch 0:  72%|███████▏  | 36/50 [00:14<00:05,  2.56it/s, v_num=0, train_loss_step=170.0]



Epoch 0:  74%|███████▍  | 37/50 [00:14<00:05,  2.56it/s, v_num=0, train_loss_step=196.0]



Epoch 0:  76%|███████▌  | 38/50 [00:14<00:04,  2.54it/s, v_num=0, train_loss_step=236.0]



Epoch 0:  78%|███████▊  | 39/50 [00:15<00:04,  2.55it/s, v_num=0, train_loss_step=155.0]



Epoch 0:  80%|████████  | 40/50 [00:15<00:03,  2.55it/s, v_num=0, train_loss_step=194.0]



Epoch 0:  82%|████████▏ | 41/50 [00:16<00:03,  2.56it/s, v_num=0, train_loss_step=155.0]



Epoch 0:  84%|████████▍ | 42/50 [00:16<00:03,  2.55it/s, v_num=0, train_loss_step=167.0]



Epoch 0:  86%|████████▌ | 43/50 [00:16<00:02,  2.55it/s, v_num=0, train_loss_step=167.0]



Epoch 0:  88%|████████▊ | 44/50 [00:17<00:02,  2.55it/s, v_num=0, train_loss_step=187.0]



Epoch 0:  90%|█████████ | 45/50 [00:17<00:01,  2.55it/s, v_num=0, train_loss_step=138.0]



Epoch 0:  92%|█████████▏| 46/50 [00:17<00:01,  2.56it/s, v_num=0, train_loss_step=159.0]



Epoch 0:  94%|█████████▍| 47/50 [00:18<00:01,  2.57it/s, v_num=0, train_loss_step=87.60]

[rank: 0] Received SIGTERM: 15


Epoch 0:  96%|█████████▌| 48/50 [00:18<00:00,  2.55it/s, v_num=0, train_loss_step=158.0]

SIGTERMException: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


: 

This is to load the model you want

In [None]:
best_model_path = 'lightning_logs_test/lightning_logs/version_0/checkpoints/epoch=1-step=6.ckpt' # Classification
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

%%<br>
calcualte root mean squared error on validation set

In [None]:
actuals = [y[0] for x, y in iter(val_dataloader)]
val_predictions = tft.predict(val_dataloader)

%%

In [None]:
actuals[0]

%%

In [None]:
val_predictions