In [1]:
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '../../src'))

In [2]:
import logging
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

In [3]:
from data import load_target, load_covariates

In [9]:
import torch
from darts.models import TFTModel
from darts.dataprocessing.transformers.scaler import Scaler
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from tqdm import tqdm
from optuna.integration import PyTorchLightningPruningCallback
import optuna
import numpy as np 
from sklearn.preprocessing import MaxAbsScaler
from darts.metrics import smape, mse
from darts.dataprocessing.transformers.scaler import Scaler
from darts.utils.likelihood_models import GaussianLikelihood
from darts import concatenate

In [10]:
def train_val_test_split(series, train_split: float, val_split: float):
    val_len = int(len(series) * train_split)
    test_len = int(len(series) * val_split)
    train, val, test = series[:val_len], series[val_len:test_len], series[test_len:]
    return train, val, test

In [11]:
# Load Data
target_series = load_target('../../data/03_processed/on_forecourt_sessions.csv', group_cols='location_id',
                            time_col='date', value_cols='energy_delivered_kwh', static_cols=['num_evse'], freq='D')
covariates = load_covariates('../../data/03_processed/weather_ecad.csv', time_col='date',
                                value_cols=['temp_max', 'temp_min', 'sunshine', 'precip'], freq='D')

target_series = [series for series in target_series if len(series) == 1035]
# Cluster Time Series
series = concatenate(target_series, axis=1)

TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.85

train_series, val_series, test_series = train_val_test_split(series, TRAIN_SPLIT, VAL_SPLIT)

# scale target
target_scaler = Scaler(MaxAbsScaler())
train_series = target_scaler.fit_transform(train_series)
val_series = target_scaler.transform(val_series)
series_transformed = target_scaler.transform(series)


train_covariates, val_covariates, test_covariates = train_val_test_split(covariates, TRAIN_SPLIT, VAL_SPLIT)
# scale covariate
covariate_scaler = Scaler(MaxAbsScaler())
train_covariates = covariate_scaler.fit_transform(train_covariates)
val_covariates = covariate_scaler.transform(val_covariates)
covariates_transformed = covariate_scaler.transform(covariates)

train_val_series = concatenate([train_series, val_series])

In [12]:
covariates

In [25]:
# define objective function
def objective(trial):
    # select input and output chunk lengths
    in_len = trial.suggest_int("in_len", 7, 64)
    out_len = trial.suggest_int("out_len", 1, in_len-1)

    # Other hyperparameters
    hidden_dim = trial.suggest_int("hidden_dim", 4, 32)
    lstm_layers = trial.suggest_int("lstm_layers", 1, 5)
    num_attention_heads = trial.suggest_int("num_attention_heads", 1, 8)
    full_attention = trial.suggest_categorical("full_attention", [False, True])
    dropout = trial.suggest_float("dropout", 0.0, 0.4)
    lr = trial.suggest_float("lr", 5e-5, 1e-3, log=True)
    include_day = trial.suggest_categorical("day", [False, True])

    # throughout training we'll monitor the validation loss for both pruning and early stopping
    pruner = PyTorchLightningPruningCallback(trial, monitor="val_loss")
    early_stopper = EarlyStopping("val_loss", min_delta=0.001, patience=5, verbose=False)

    pl_trainer_kwargs = {"callbacks": [ early_stopper, pruner]}

    num_workers = 0

    # optionally also add the (scaled) year value as a past covariate
    if include_day:
        encoders = {"datetime_attribute": {"future": ["day"]},
                    "transformer": Scaler()}
    else:
        encoders = None

    # reproducibility
    torch.manual_seed(42)

    # build the TCN model
    model = TFTModel(
        hidden_size=hidden_dim, 
        lstm_layers=lstm_layers,
        input_chunk_length=in_len,
        output_chunk_length=out_len,
        num_attention_heads=num_attention_heads,
        full_attention=full_attention,
        batch_size=32,
        n_epochs=100,
        add_encoders=encoders,
        nr_epochs_val_period=1,
        dropout=dropout,
        optimizer_kwargs={'lr': lr}, 
        random_state=0,
        model_name="tft_model",
        likelihood=GaussianLikelihood(),
        pl_trainer_kwargs=pl_trainer_kwargs,
        force_reset=True,
        save_checkpoints=True,
        add_relative_index=True,
    )


    # train the model
    model.fit(
        series=train_series,
        # past_covariates=covariates_transformed,
        val_series=val_series,
        # val_past_covariates=covariates_transformed,
        num_loader_workers=num_workers,
    )

    # reload best model over course of training
    model = TFTModel.load_from_checkpoint("tft_model")

    # Evaluate how good it is on the validation set, using sMAPE
    # preds = model.predict(series=train, n=VAL_LEN)

    smapes = model.backtest(
        train_val_series,
        start=val_series.start_time(),
        forecast_horizon=1,
        stride=1,
        last_points_only=False,
        retrain=False,
        verbose=True,
        metric=smape
    )
    
    smape_val = np.mean(smapes)

    return smape_val if smape_val != np.nan else float("inf")


# for convenience, print some optimization trials information
def print_callback(study, trial):
    print(f"Current value: {trial.value}, Current params: {trial.params}")
    print(f"Best value: {study.best_value}, Best params: {study.best_trial.params}")


# optimize hyperparameters by minimizing the sMAPE on the validation set
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, callbacks=[print_callback])

[32m[I 2023-05-24 11:21:42,071][0m A new study created in memory with name: no-name-0ba649ce-a325-4d18-947b-c2b7cf1cd76c[0m
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params
----------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0     
1  | val_metrics                       | MetricCollection                 | 0     
2  | input_embeddings                  | _MultiEmbedding                  | 0     
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 2.1 K 
4  | encoder_vsn                       | _VariableSelectionNetwork        | 25.4 K
5  | decoder_vsn                       | _VariableSelectionNetwork        | 873   
6  | static_context_grn                | _GatedResidual

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

[33m[W 2023-05-24 11:21:48,489][0m Trial 0 failed with parameters: {'in_len': 43, 'out_len': 16, 'hidden_dim': 31, 'lstm_layers': 4, 'num_attention_heads': 6, 'full_attention': False, 'dropout': 0.07991211042119284, 'lr': 0.0008767835471428352, 'day': False} because of the following error: AttributeError("'Trainer' object has no attribute 'training_type_plugin'").[0m
Traceback (most recent call last):
  File "c:\Users\tijmen.vanetten\AppData\Local\miniconda3\envs\kedro\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\tijmen.vanetten\AppData\Local\Temp\ipykernel_12844\2018246492.py", line 59, in objective
    model.fit(
  File "c:\Users\tijmen.vanetten\AppData\Local\miniconda3\envs\kedro\lib\site-packages\darts\utils\torch.py", line 112, in decorator
    return decorated(self, *args, **kwargs)
  File "c:\Users\tijmen.vanetten\AppData\Local\miniconda3\envs\kedro\lib\site-packages\darts\models\forecasting\torch_for

AttributeError: 'Trainer' object has no attribute 'training_type_plugin'

In [None]:
results = study.trials_dataframe()
results[results['value'] == results['value'].min()]

In [None]:
covariates.has_same_time_as(target_series)

False

In [None]:
target_series.is_within_range(covariates)

AttributeError: 'list' object has no attribute 'is_within_range'

In [23]:
val_series.start_time().value

1640390400000000000