In [1]:
import pandas as pd
import pytorch_lightning as pl
import numpy as np
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import GroupNormalizer
from pytorch_forecasting.models.temporal_fusion_transformer import TemporalFusionTransformer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_forecasting.metrics.quantile import QuantileLoss

2023-02-12 17:41:48.527682: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-12 17:41:48.527705: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
dir_dataset = '../Datasets/dataset_fusion_transfomers/LD2011_2014.txt'
data = pd.read_csv(dir_dataset, index_col=0, sep=';', decimal=',')
data.index = pd.to_datetime(data.index)
data.sort_index(inplace=True)

print(data.head(5))

# down sampling of the information
data = data.resample('1h').mean().replace(0., np.nan)
earliest_time = data.index.min()
#df = data[['MT_002', 'MT_004', 'MT_005', 'MT_006', 'MT_008']]
df = data[['MT_004']]

                     MT_001  MT_002  MT_003  MT_004  MT_005  MT_006  MT_007  \
2011-01-01 00:15:00     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2011-01-01 00:30:00     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2011-01-01 00:45:00     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2011-01-01 01:00:00     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2011-01-01 01:15:00     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

                     MT_008  MT_009  MT_010  ...  MT_361  MT_362  MT_363  \
2011-01-01 00:15:00     0.0     0.0     0.0  ...     0.0     0.0     0.0   
2011-01-01 00:30:00     0.0     0.0     0.0  ...     0.0     0.0     0.0   
2011-01-01 00:45:00     0.0     0.0     0.0  ...     0.0     0.0     0.0   
2011-01-01 01:00:00     0.0     0.0     0.0  ...     0.0     0.0     0.0   
2011-01-01 01:15:00     0.0     0.0     0.0  ...     0.0     0.0     0.0   

                     MT_364  MT_365  MT_366  MT_367  MT_368  MT_369 

In [3]:
for label in df:
    ts = df[label]

    start_date = min(ts.fillna(method='ffill').dropna().index)
    end_date = max(ts.fillna(method='bfill').dropna().index)

    active_range = (ts.index >= start_date) & (ts.index <= end_date)
    ts = ts[active_range].fillna(0.)

    tmp = pd.DataFrame({'power_usage': ts})
    date = tmp.index

    tmp['hours_from_start'] = (date - earliest_time).seconds / 60 / 60 + (date - earliest_time).days * 24
    tmp['hours_from_start'] = tmp['hours_from_start'].astype('int')

    tmp['days_from_start'] = (date - earliest_time).days
    tmp['date'] = date
    tmp['consumer_id'] = label
    tmp['hour'] = date.hour
    tmp['day'] = date.day
    tmp['day_of_week'] = date.dayofweek
    tmp['month'] = date.month

time_df = tmp


In [13]:
max_prediction_length = 24
max_encoder_length = 7 * 24
training_cutoff = time_df["hours_from_start"].max() - max_prediction_length

training = TimeSeriesDataSet(
    time_df[lambda x: x.hours_from_start <= training_cutoff],
    time_idx="hours_from_start",
    target="power_usage",
    group_ids=["consumer_id"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["consumer_id"],
    time_varying_known_reals=["hours_from_start", "day", "day_of_week", "month", 'hour'],
    time_varying_unknown_reals=['power_usage'],
    target_normalizer=GroupNormalizer(
        groups=["consumer_id"], transformation="softplus"
    ),  # we normalize by group
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

validation = TimeSeriesDataSet.from_dataset(training, time_df, predict=True, stop_randomization=True)

# create dataloaders for  our model
batch_size = 64
# if you have a strong GPU, feel free to increase the number of workers
val_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
#val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [14]:
import torch
from tqdm import tqdm

dir_model = "/mnt/arquivos_linux/wile_C/Predictive_Maintenance_free_dataset/lightning_logs/version_0/checkpoints/epoch=6-step=2877.ckpt"

best_tft = TemporalFusionTransformer.load_from_checkpoint(dir_model)

actuals = torch.cat([y[0] for x, y in tqdm(iter(val_dataloader))])
#print((val_dataloader.dataset[0][0]))

predictions = best_tft.predict(val_dataloader)

# average p50 loss overall
print((actuals - predictions).abs().mean().item())
# average p50 loss per time series
print((actuals - predictions).abs().mean(axis=1))

raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)

print(raw_predictions._fields)

print('\n')
print(raw_predictions['prediction'].shape)


best_tft.plot_prediction(x, raw_predictions, idx=0, add_loss_to_title=True)

  rank_zero_warn(
  rank_zero_warn(
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 411/411 [00:16<00:00, 24.97it/s]


KeyboardInterrupt: 

In [None]:
for idx in range(raw_predictions.prediction.shape[0]):
    best_tft.plot_prediction(x, raw_predictions, idx=idx, add_loss_to_title=True);

In [12]:
raw_predictions.prediction.shape

torch.Size([1, 24, 7])