In [17]:
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '../../src'))

In [18]:
import logging
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

In [19]:
from data import load_target, load_covariates

In [20]:
import torch
from darts.models import RNNModel
from darts.dataprocessing.transformers.scaler import Scaler
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from tqdm import tqdm
from optuna.integration import PyTorchLightningPruningCallback
import optuna
import numpy as np 
from sklearn.preprocessing import MaxAbsScaler
from darts.metrics import smape, mse
from darts.dataprocessing.transformers.scaler import Scaler
from darts.utils.likelihood_models import GaussianLikelihood
from darts import concatenate

In [21]:
def train_val_test_split(series, train_split: float, val_split: float):
    val_len = int(len(series) * train_split)
    test_len = int(len(series) * val_split)
    train, val, test = series[:val_len], series[val_len:test_len], series[test_len:]
    return train, val, test

In [22]:
# Load Data
target_series = load_target('../../data/03_processed/on_forecourt_sessions.csv', group_cols='location_id',
                            time_col='date', value_cols='energy_delivered_kwh', static_cols=['num_evse'], freq='D')
covariates = load_covariates('../../data/03_processed/weather_ecad.csv', time_col='date',
                                value_cols=['temp_max', 'temp_min', 'sunshine', 'precip'], freq='D')

target_series = [series for series in target_series if len(series) == 1035]
# Cluster Time Series
series = concatenate(target_series, axis=1)

TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.85

train_series, val_series, test_series = train_val_test_split(series, TRAIN_SPLIT, VAL_SPLIT)

# scale target
target_scaler = Scaler(MaxAbsScaler())
train_series = target_scaler.fit_transform(train_series)
val_series = target_scaler.transform(val_series)
series_transformed = target_scaler.transform(series)


train_covariates, val_covariates, test_covariates = train_val_test_split(covariates, TRAIN_SPLIT, VAL_SPLIT)
# scale covariate
covariate_scaler = Scaler(MaxAbsScaler())
train_covariates = covariate_scaler.fit_transform(train_covariates)
val_covariates = covariate_scaler.transform(val_covariates)
covariates_transformed = covariate_scaler.transform(covariates)

train_val_series = concatenate([train_series, val_series])

In [25]:
covariates

In [26]:
# define objective function
def objective(trial):
    # select input and output chunk lengths
    in_len = trial.suggest_int("in_len", 7, 64)
    out_len = trial.suggest_int("out_len", 1, in_len-1)

    # Other hyperparameters
    hidden_dim = trial.suggest_int("hidden_dim", 4, 32)
    n_rnn_layers = trial.suggest_int("n_rnn_layers", 1, 5)
    dropout = trial.suggest_float("dropout", 0.0, 0.4)
    lr = trial.suggest_float("lr", 5e-5, 1e-3, log=True)
    include_day = trial.suggest_categorical("day", [False, True])

    # throughout training we'll monitor the validation loss for both pruning and early stopping
    # pruner = PyTorchLightningPruningCallback(trial, monitor="val_loss")
    early_stopper = EarlyStopping("val_loss", min_delta=0.001, patience=5, verbose=True)

    pl_trainer_kwargs = {"callbacks": [ early_stopper]}
    num_workers = 0

    # optionally also add the (scaled) year value as a past covariate
    if include_day:
        encoders = {"datetime_attribute": {"future": ["day"]},
                    "transformer": Scaler()}
    else:
        encoders = None

    # reproducibility
    torch.manual_seed(42)

    # build the TCN model
    model = RNNModel(
        model="LSTM",
        hidden_dim=hidden_dim, 
        n_rnn_layers=n_rnn_layers,
        input_chunk_length=in_len,
        output_chunk_length=out_len,
        batch_size=32,
        n_epochs=100,
        add_encoders=encoders,
        nr_epochs_val_period=1,
        dropout=dropout,
        optimizer_kwargs={'lr': lr}, 
        random_state=0,
        model_name="deepar_model",
        likelihood=None, 
        loss_fn=torch.nn.MSELoss(),
        pl_trainer_kwargs=pl_trainer_kwargs,
        force_reset=True,
        save_checkpoints=True,
    )


    # train the model
    model.fit(
        series=train_series,
        future_covariates=train_covariates,
        val_series=val_series,
        val_future_covariates=val_covariates,
        num_loader_workers=num_workers,
    )

    # reload best model over course of training
    model = RNNModel.load_from_checkpoint("deepar_model")

    # Evaluate how good it is on the validation set, using sMAPE
    # preds = model.predict(series=train, n=VAL_LEN)

    smapes = model.backtest(
        train_val_series,
        start=val_series.start_time(),
        forecast_horizon=1,
        stride=1,
        last_points_only=False,
        retrain=False,
        verbose=True,
        metric=smape
    )
    
    smape_val = np.mean(smapes)

    return smape_val if smape_val != np.nan else float("inf")


# for convenience, print some optimization trials information
def print_callback(study, trial):
    print(f"Current value: {trial.value}, Current params: {trial.params}")
    print(f"Best value: {study.best_value}, Best params: {study.best_trial.params}")


# optimize hyperparameters by minimizing the sMAPE on the validation set
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, callbacks=[print_callback])

[32m[I 2023-05-24 11:02:37,898][0m A new study created in memory with name: no-name-6eade4f1-f8bc-485c-bd87-6c82cbf2063c[0m
ValueError: Missing covariates; could not find past covariates in index value range: 2021-11-30 00:00:00 - 2021-12-23 00:00:00.
[33m[W 2023-05-24 11:02:37,955][0m Trial 0 failed with parameters: {'in_len': 41, 'out_len': 12, 'hidden_dim': 15, 'n_rnn_layers': 3, 'dropout': 0.3263329712937948, 'lr': 0.000698855431431833, 'day': True} because of the following error: ValueError('Missing covariates; could not find past covariates in index value range: 2021-11-30 00:00:00 - 2021-12-23 00:00:00.').[0m
Traceback (most recent call last):
  File "c:\Users\tijmen.vanetten\AppData\Local\miniconda3\envs\kedro\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\tijmen.vanetten\AppData\Local\Temp\ipykernel_14424\1074577359.py", line 55, in objective
    model.fit(
  File "c:\Users\tijmen.vanetten\AppData\

ValueError: Missing covariates; could not find past covariates in index value range: 2021-11-30 00:00:00 - 2021-12-23 00:00:00.

In [None]:
results = study.trials_dataframe()
results[results['value'] == results['value'].min()]

In [36]:
covariates.has_same_time_as(target_series)

False

In [37]:
target_series.is_within_range(covariates)

AttributeError: 'list' object has no attribute 'is_within_range'