In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, mean_squared_error
import torch
from transformers import TrainingArguments, Trainer
from transformers import EarlyStoppingCallback
import random
from datasets import ClassLabel
from pathlib import Path
from transformers import AutoModelForMaskedLM, AutoTokenizer
#from transformers import logging as hf_logging
#hf_logging.set_verbosity(hf_logging.ERROR)
#transformers.logging.set_verbosity_error()
from tqdm.auto import tqdm
from sklearn.metrics import *
from pytorch_forecasting import TimeSeriesDataSet
# import pytorch_lightning as pl
import lightning.pytorch as pl

from pytorch_forecasting.models.temporal_fusion_transformer import *
import time
# import ciso8601
from pytorch_forecasting.models.rnn import RecurrentNetwork
from pytorch_forecasting.models.deepar import DeepAR
from pytorch_forecasting.models import NBeats
from pytorch_forecasting.models.nhits import NHiTS
from pytorch_forecasting.metrics import *
import os
import calendar

from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler
def get_scaler(scaler):
    scalers = {
        "minmax": MinMaxScaler,
        "standard": StandardScaler,
        "maxabs": MaxAbsScaler,
        "robust": RobustScaler,
    }
    return scalers.get(scaler.lower())()
scaler = get_scaler('minmax')

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import logging
# logging.getLogger('lightning').setLevel(0)
# logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning.pytorch.utilities.rank_zero").setLevel(logging.WARNING)

import warnings
warnings.filterwarnings('ignore')

In [2]:
import plotly.graph_objs as go
from plotly.offline import iplot, plot
def plot_dataset(outdir, df, month):
    data = []
    value = go.Scatter(
        x=df.index, y=df['target'].values,
        mode="lines", name="target", marker=dict(), text=df.index, line=dict(color="black"),
    )
    data.append(value)
    
    value = go.Scatter(
        x=df.index,y=df['pred'].values,
        mode="lines",name="pred",marker=dict(),text=df.index,line=dict(color="red"),
    )
    data.append(value)

    layout = dict(
        title='title',
        xaxis=dict(title="Date", ticklen=5, zeroline=False),
        yaxis=dict(title="Value", ticklen=5, zeroline=False),
    )

    fig = dict(data=data, layout=layout)
    #iplot(fig)
    plot(fig, filename = f"{outdir}/vistest-Month_{month}.html", auto_open=False)
    
def get_end_day(month):
    return calendar.monthrange(2014, month)[1]

In [None]:
# '../../train-test-sets/Data_Pods-Train2013:2017Pod0-Test2018Pod0/'
DATASETS = ['Pods-Train2013:2017Pod0-Test2018Pod0']
MODELS = ['LSTM', 'GRU', 'DeepAR', 'NBeats', 'TFT']
AHEADS = [1, 96, 672]
MONTHS = [1,2,3,4,5,6,7,8,9,10,11,12]
nlags = 135#45
TEST_YEARS = {
    'Pods-Train2014Pod0-Test2015Pod0':2015,
    'Pods-Train2013:2017Pod0-Test2018Pod0':2018,
}

for dataset in tqdm(DATASETS, desc='dataset', leave=True):
    for modelname in tqdm(MODELS, desc='models', leave=False):
        for ahead in tqdm(AHEADS, desc='ahead', leave=False):
            SAVEDIR = f'../../trained-models/Data_{dataset}/Model_{modelname}/Loss_CrossEntropy/Lags_45/Prediction_point_{ahead}'
            
            # load model
            model_dict = {
                'TFT': TemporalFusionTransformer,
                'LSTM': RecurrentNetwork,
                'GRU': RecurrentNetwork,
                'DeepAR': DeepAR,
                'NBeats': NBeats,
                'NHits': NHiTS
            }
            model = model_dict[modelname].load_from_checkpoint(SAVEDIR + "/model.ckpt")
            
            # load data
            test = pd.read_parquet(f"{SAVEDIR.replace('trained-models','train-test-sets').replace(modelname,'Others')}/test.pqt")#.head(1000)
            test['date'] = pd.to_datetime(test['date'])
            test['year'] = [x.year for x in test['date']]
            test['month'] = [x.month for x in test['date']]
            test['day'] = [x.day for x in test['date']]
            test['time_idx'] = range(len(test))
            test['group'] = 0
            # train[['value']] = train[['value']].fillna(value=0)
            test.dropna(inplace=True)
            
            for month in tqdm(MONTHS, desc='month', leave=False):
                sub = test.loc[(test['date'] >= f"{TEST_YEARS[dataset]}-{month}-1") & (test['date'] <= f"{TEST_YEARS[dataset]}-{month}-{get_end_day(month)}")]
                
                #display(sub.tail())
                prediction_length = ahead
                max_encoder_length = nlags
                dataset_args = dict(
                    min_encoder_length=max_encoder_length,
                    max_encoder_length=max_encoder_length,
                    min_prediction_length=prediction_length,
                    max_prediction_length=prediction_length,
                    time_varying_known_reals = ['Tensione','PotContrImp','PotDisp','PotMax','year','month','day'],
                    time_varying_known_categoricals = ['Pod','PuntoDispacciamento','Trattamento'],
                    time_varying_unknown_reals= ["value"],
                    add_relative_time_idx=True,
                    add_target_scales=True,
                    add_encoder_length=True,
                    allow_missing_timesteps=True
                )

                if modelname == 'NBeats':
                    dataset_args['add_relative_time_idx'] = False
                    dataset_args['time_varying_known_reals'] = []
                    dataset_args['time_varying_known_categoricals'] = []
                    dataset_args['add_target_scales'] = False
                    dataset_args['add_encoder_length'] = False

                if modelname == 'NHits':
                    dataset_args['add_relative_time_idx'] = False
                               
                testing = TimeSeriesDataSet(
                    sub,
                    group_ids=["group"],
                    target="value",
                    time_idx="time_idx",
                    **dataset_args   
                )
#                 validation = TimeSeriesDataSet.from_dataset(training, val, predict=False, stop_randomization=True)
                test_dataloader = testing.to_dataloader(train=False, batch_size=32)
                
                #gold = torch.cat([y[0] for x, y in tqdm(iter(test_dataloader))])
                raw_pred = model.predict(test_dataloader, trainer_kwargs=dict(accelerator="cpu"))
                raw_pred = [float(x[ahead-1]) for x in raw_pred]
                # shift prediction
                sub['value'] = sub['value'].shift((-1)*ahead)
                sub = sub[np.isfinite(sub['value'])]
                sub = sub.tail(len(raw_pred))
                
                sub['pred'] = raw_pred
                sub.rename(columns={'value':'target'},inplace=True)
                sub['target'] = sub['target'].astype(float)
                sub['pred'] = sub['pred'].astype(float)
                sub.index = range(len(sub))
                
                OUTDIR = SAVEDIR.replace('trained-models','inference')
                Path(OUTDIR).mkdir(parents=True, exist_ok=True)
                sub.to_csv(f"{OUTDIR}/test-Month_{month}.csv", index=True, header=True)

                OUTDIR = SAVEDIR.replace('trained-models','visual')
                Path(OUTDIR).mkdir(parents=True, exist_ok=True)
                fig = plot_dataset(outdir=OUTDIR, df=sub, month=month)

                #print(f"MONTH {month}")
                # assert False

dataset:   0%|          | 0/1 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

ahead:   0%|          | 0/3 [00:00<?, ?it/s]

month:   0%|          | 0/12 [00:00<?, ?it/s]

month:   0%|          | 0/12 [00:00<?, ?it/s]