In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, mean_squared_error
import torch
from transformers import TrainingArguments, Trainer
from transformers import EarlyStoppingCallback
import random
from datasets import ClassLabel
from pathlib import Path
from transformers import AutoModelForMaskedLM, AutoTokenizer
#from transformers import logging as hf_logging
#hf_logging.set_verbosity(hf_logging.ERROR)
#transformers.logging.set_verbosity_error()
from tqdm.auto import tqdm
from sklearn.metrics import *
from pytorch_forecasting import TimeSeriesDataSet
# import pytorch_lightning as pl
import lightning.pytorch as pl

from pytorch_forecasting.models.temporal_fusion_transformer import *
import time
# import ciso8601
from pytorch_forecasting.models.rnn import RecurrentNetwork
from pytorch_forecasting.models.deepar import DeepAR
from pytorch_forecasting.models import NBeats
from pytorch_forecasting.models.nhits import NHiTS
from pytorch_forecasting.metrics import *
from sklearn.model_selection import train_test_split

import os
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler
def get_scaler(scaler):
    scalers = {
        "minmax": MinMaxScaler,
        "standard": StandardScaler,
        "maxabs": MaxAbsScaler,
        "robust": RobustScaler,
    }
    return scalers.get(scaler.lower())()
scaler = get_scaler('minmax')

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import warnings
warnings.filterwarnings('ignore')

In [13]:
ARGS = 'n'
        
if ARGS == '':
    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument("--dataset", default=None, type=str)
    parser.add_argument("--model", default=None, type=str)
    parser.add_argument("--loss", default=None, type=str)
    parser.add_argument("--nlags", default=None, type=int)
    parser.add_argument("--predtype", default=None, type=str)
    parser.add_argument("--ahead", default=None, type=int)
    lineargs = parser.parse_args()
else:
    class Args:
        pass
    lineargs = Args()
    lineargs.dataset = 'Pods-Train2013:2017Pod0-Test2018Pod0' #'Pods-Train2014Pod0-Test2015Pod0'
    lineargs.model = 'LSTM' #  LSTM, GRU, DeepAR, NBeats, --NHits, TFT
    lineargs.loss = 'CrossEntropy' 
    lineargs.nlags = 45 
    lineargs.predtype = 'point'
    lineargs.ahead = 672 # 1 # 96 # 672
# '../../train-test-sets/Data_Pods-Train2014Pod0-Test2015Pod0/'
# '../../train-test-sets/Data_Pods-Train2014Pod0:100-Test2015Pod0:100/'

SAVEDIR = f'../../trained-models/Data_{lineargs.dataset}/Model_{lineargs.model}/Loss_{lineargs.loss}/Lags_{lineargs.nlags}/Prediction_{lineargs.predtype}_{lineargs.ahead}'
Path(SAVEDIR).mkdir(parents=True, exist_ok=True)

In [14]:
train = pd.read_parquet(f"{SAVEDIR.replace('trained-models','train-test-sets').replace(lineargs.model,'Others')}/train.pqt")#.head(1000)
train['year'] = [x.year for x in train['date']]
train['month'] = [x.month for x in train['date']]
train['day'] = [x.day for x in train['date']]
train['time_idx'] = range(len(train))
train['group'] = 0
# train[['value']] = train[['value']].fillna(value=0)
train.dropna(inplace=True)

train.reset_index(inplace=True)
train

Unnamed: 0,index,Pod,PuntoDispacciamento,Trattamento,Tensione,PotContrImp,PotDisp,PotMax,quarter,value,date,year,month,day,time_idx,group
0,182842,IT003E01001342,NORD,O,10000,200.0,560.0,141.36,1,16.695,2013-12-01,2013,12,1,0,0
1,180918,IT003E01001342,NORD,O,10000,200.0,560.0,141.36,2,17.685,2013-12-01,2013,12,1,1,0
2,178994,IT003E01001342,NORD,O,10000,200.0,560.0,141.36,3,17.055,2013-12-01,2013,12,1,2,0
3,177070,IT003E01001342,NORD,O,10000,200.0,560.0,141.36,4,17.565,2013-12-01,2013,12,1,3,0
4,175146,IT003E01001342,NORD,O,10000,200.0,560.0,141.36,5,16.920,2013-12-01,2013,12,1,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149087,9103,IT003E01001342,NORD,O,15000,200.0,560.0,263.88,92,21.510,2017-08-31,2017,8,31,149563,0
149088,7179,IT003E01001342,NORD,O,15000,200.0,560.0,263.88,93,21.345,2017-08-31,2017,8,31,149564,0
149089,5255,IT003E01001342,NORD,O,15000,200.0,560.0,263.88,94,21.660,2017-08-31,2017,8,31,149565,0
149090,3331,IT003E01001342,NORD,O,15000,200.0,560.0,263.88,95,21.330,2017-08-31,2017,8,31,149566,0


In [15]:
def train_val_split(data, train_percentage):
    train, val = train_test_split(data, train_size=train_percentage)
    return train, val
train, val = train_val_split(train, train_percentage=0.9)
print(len(train))
print(len(val))

134182
14910


In [16]:
prediction_length = lineargs.ahead
max_encoder_length = lineargs.nlags

# see https://pytorch-forecasting.readthedocs.io/en/stable/api/pytorch_forecasting.data.timeseries.TimeSeriesDataSet.html#pytorch_forecasting.data.timeseries.TimeSeriesDataSet
dataset_args = dict(
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=prediction_length,
    max_prediction_length=prediction_length,
    time_varying_known_reals = ['Tensione','PotContrImp','PotDisp','PotMax','year','month','day'],
    time_varying_known_categoricals = ['Pod','PuntoDispacciamento','Trattamento'],
    time_varying_unknown_reals= ["value"],
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

if lineargs.model == 'NBeats':
    dataset_args['add_relative_time_idx'] = False
    dataset_args['time_varying_known_reals'] = []
    dataset_args['time_varying_known_categoricals'] = []
    dataset_args['add_target_scales'] = False
    dataset_args['add_encoder_length'] = False
    
if lineargs.model == 'NHits':
    dataset_args['add_relative_time_idx'] = False
    
training = TimeSeriesDataSet(
    train,
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    **dataset_args   
)
validation = TimeSeriesDataSet.from_dataset(training, val, predict=False, stop_randomization=True)

batch_size = 16  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size)
val_dataloader = validation.to_dataloader(train=True, batch_size=batch_size)

display(train.head())
print(train.shape, len(train_dataloader))

Unnamed: 0,index,Pod,PuntoDispacciamento,Trattamento,Tensione,PotContrImp,PotDisp,PotMax,quarter,value,date,year,month,day,time_idx,group
93772,29598,IT003E01001342,NORD,O,10000,200.0,560.0,207.0,81,21.99,2015-05-06,2015,5,6,94064,0
126269,127512,IT003E01001342,NORD,O,10000,200.0,560.0,264.72,30,44.64,2015-07-08,2015,7,8,126653,0
102107,62546,IT003E01001342,NORD,O,10000,200.0,560.0,215.94,64,39.525,2016-09-01,2016,9,1,102399,0
30147,653,IT003E01001342,NORD,O,10000,200.0,560.0,157.86,96,15.54,2015-02-10,2015,2,10,30335,0
125284,175937,IT003E01001342,NORD,O,10000,200.0,560.0,264.0,5,18.225,2016-07-29,2016,7,29,125668,0


(134182, 16) 7488


In [17]:
#early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
#lr_logger = LearningRateMonitor()  # log the learning rate
#logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    max_epochs=10,
    #gpus=[0],
    gradient_clip_val=0.1, 
#     weights_summary="top",
    #callbacks=[early_stop_callback],
    #logger=logger,
)
if lineargs.model == 'TFT':
    model = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=0.03,
        hidden_size=16,  # most important hyperparameter apart from learning rate
        attention_head_size=1,
        dropout=0.1,  # between 0.1 and 0.3 are good values
        hidden_continuous_size=8,  # set to <= hidden_size
        output_size=7,  # 7 quantiles by default
        loss=QuantileLoss(),
        reduce_on_plateau_patience=4,
    )
if lineargs.model == 'LSTM':
    model = RecurrentNetwork.from_dataset(
    training,
    cell_type='LSTM',
    hidden_size=10,    # Defaults to 10
    rnn_layers=2,      # Defaults to 2
    dropout=0.1,       # Defaults to 0.1
    learning_rate = 1e-3,
    )
    
if lineargs.model == 'GRU':
    model = RecurrentNetwork.from_dataset(
    training,
    cell_type='GRU',
    hidden_size=10,    # Defaults to 10
    rnn_layers=2,      # Defaults to 2
    dropout=0.1,       # Defaults to 0.1
    learning_rate = 1e-3,
    )
    
if lineargs.model == 'DeepAR':
    model = DeepAR.from_dataset(
    training,
    )
    
if lineargs.model == 'NBeats':
    model = NBeats.from_dataset(
    training,
    )
if lineargs.model == 'NHits':
    model = NHiTS.from_dataset(
    training,
    pooling_sizes = [1,1,1]
    )

print(lineargs.model)
print(f"Number of parameters in network: {model.size()/1e3:.1f}k")

Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LSTM
Number of parameters in network: 2.0k


In [18]:
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

You are using a CUDA device ('GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type           | Params
----------------------------------------------------
0 | loss             | MAE            | 0     
1 | logging_metrics  | ModuleList     | 0     
2 | embeddings       | MultiEmbedding | 3     
3 | rnn              | LSTM           | 2.0 K 
4 | output_projector | Linear         | 11    
----------------------------------------------------
2.0 K     Trainable params
0         Non-trainable params
2.0 K     Total params
0.008     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

In [19]:
best_model_path = trainer.checkpoint_callback.best_model_path
#print(best_model_path)
import shutil
shutil.copyfile(best_model_path, SAVEDIR+"/model.ckpt")

FileNotFoundError: [Errno 2] No such file or directory: ''

In [None]:
# model_dict = {
#     'TFT': TemporalFusionTransformer,
#     'LSTM': RecurrentNetwork,
#     'GRU': RecurrentNetwork,
#     'DeepAR': DeepAR,
#     'NBeats': NBeats,
#     'NHits': NHiTS
# }

# best = model_dict[lineargs.model].load_from_checkpoint(SAVEDIR + "/model.ckpt")

In [None]:
# gold = torch.cat([y[0] for x, y in tqdm(iter(train_dataloader))])
# pred = best.predict(train_dataloader, trainer_kwargs=dict(accelerator="cpu"))
# print(pred.shape)
# print(pred[0])

In [None]:
# import matplotlib.pyplot as plt
# list1 = list([x[0] for x in gold.numpy()])
# list2 =  list([x[0] for x in pred.numpy()])

# fig, ax = plt.subplots()
# ax.plot(list1, label='List 1')
# ax.plot(list2, label='List 2')
# ax.set_title('Two Lists on the Same Plot')
# ax.set_xlabel('X-axis label')
# ax.set_ylabel('Y-axis label')
# ax.legend()
# plt.show()