In [5]:
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from itertools import islice

from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.repository.datasets import get_dataset

from model import VQTrEstimator

In [6]:
from pytorch_lightning.utilities.model_summary import summarize
from gluonts.dataset.common import ListDataset


In [7]:
#Tuning GluonTS models with Optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import optuna
import torch
from gluonts.mx import Trainer


In [8]:
prediction_length  = 24

In [9]:
dataset = get_dataset("electricity")

In [16]:
class VQTrTuningObjective:  
    def __init__(self, dataset, prediction_length, metric_type="mean_wQuantileLoss"):
        self.dataset = dataset
        self.prediction_length = prediction_length
        
        self.metric_type = metric_type
    
    def get_params(self, trial) -> dict:
        return {   
        #  "context_length":trial.suggest_int("context_length", 240, 336,step = 48),   
        "num_encoder_layers": trial.suggest_int("num_encoder_layers", 2, 16, step = 4),
        "num_decoder_layers": trial.suggest_int("num_decoder_layers", 2, 16,step = 4),
        # "dim_feedforward" :  trial.suggest_int("dim_feedforward", 2, 32,step = 16), 
        # "dropout": trial.suggest_float("dropout", 0.1, 0.5),
        }
     
    def __call__(self, trial):
        params = self.get_params(trial)
        estimator = VQTrEstimator(
          freq=self.dataset.metadata.freq,
          prediction_length=prediction_length,
          context_length=prediction_length*12,
          codebook_size=25,
          dim_head=8,
          nhead=2,
          depth=1,
          num_encoder_layers=params['num_encoder_layers'],
          num_decoder_layers=params['num_decoder_layers'],
          dim_feedforward=16,
          activation="gelu",

          num_feat_static_cat=1,
          cardinality=[321],
          embedding_dimension=[3],
          
          scaling=True,
        
          batch_size=256,
          num_batches_per_epoch=300,
          trainer_kwargs=dict(max_epochs=1, accelerator='gpu', devices=1),
      )
        predictor = estimator.train(
            training_data=self.dataset.train,
            num_workers=8,
            shuffle_buffer_length=512,
            cache_data=True,
        )
        
        forecast_it, ts_it = make_evaluation_predictions(
            dataset=self.dataset.test,
            predictor=predictor
        )
        forecasts = list(forecast_it)
        # if layer == layers[0]:
        tss = list(ts_it)
        
        evaluator = Evaluator()
        agg_metrics, _ = evaluator(iter(tss), iter(forecasts))
        return agg_metrics[self.metric_type]

In [17]:
import time
start_time = time.time()
study = optuna.create_study(direction="minimize")
study.optimize(VQTrTuningObjective(dataset, prediction_length = dataset.metadata.prediction_length), n_trials=10)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
print(time.time() - start_time)

[32m[I 2022-08-21 20:49:31,814][0m A new study created in memory with name: no-name-ccbe6759-cce4-4f94-953e-c2b6fcd18ae1[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type      | Params
------------------------------------
0 | model | VQTrModel | 365 K 
------------------------------------
365 K     Trainable params
0         Non-trainable params
365 K     Total params
1.460     Total estimated model param

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.48965 (best 9.48965), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_7/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 25057.43it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:00:09,566][0m Trial 0 finished with value: 0.25837099847978373 and parameters: {'num_encoder_layers': 6, 'num_decoder_layers': 14}. Best is trial 0 with value: 0.25837099847978373.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU avai

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.97407 (best 9.97407), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_8/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 25275.02it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:08:26,952][0m Trial 1 finished with value: 0.27443531041628977 and parameters: {'num_encoder_layers': 14, 'num_decoder_layers': 10}. Best is trial 0 with value: 0.25837099847978373.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU ava

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 8.42607 (best 8.42607), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_9/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 26151.63it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:13:42,097][0m Trial 2 finished with value: 0.09769980001494089 and parameters: {'num_encoder_layers': 2, 'num_decoder_layers': 6}. Best is trial 2 with value: 0.09769980001494089.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU avail

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.27679 (best 9.27679), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_10/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 23623.99it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:21:50,459][0m Trial 3 finished with value: 0.08734118106732694 and parameters: {'num_encoder_layers': 6, 'num_decoder_layers': 10}. Best is trial 3 with value: 0.08734118106732694.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU ava

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 8.52826 (best 8.52826), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_11/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 26689.74it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:26:20,190][0m Trial 4 finished with value: 0.13912399491520058 and parameters: {'num_encoder_layers': 2, 'num_decoder_layers': 2}. Best is trial 3 with value: 0.08734118106732694.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU avai

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.30981 (best 9.30981), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_12/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 24728.37it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:32:01,350][0m Trial 5 finished with value: 0.0900566609536304 and parameters: {'num_encoder_layers': 14, 'num_decoder_layers': 6}. Best is trial 3 with value: 0.08734118106732694.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU avai

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.38202 (best 9.38202), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_13/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 24035.18it/s]
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fafc8818680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _Mult

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.02387 (best 9.02387), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_14/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 27594.91it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:46:10,298][0m Trial 7 finished with value: 0.18313897216192548 and parameters: {'num_encoder_layers': 10, 'num_decoder_layers': 6}. Best is trial 3 with value: 0.08734118106732694.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU ava

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 12.73336 (best 12.73336), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_15/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 26017.34it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 21:51:41,484][0m Trial 8 finished with value: 0.09802857866215788 and parameters: {'num_encoder_layers': 10, 'num_decoder_layers': 6}. Best is trial 3 with value: 0.08734118106732694.[0m
  low=low, old_high=old_high, high=high, step=step
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  cpuset_checked))
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU a

Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 300: 'train_loss' reached 9.94036 (best 9.94036), saving model to '/content/drive/MyDrive/vq-tr-main/lightning_logs/version_16/checkpoints/epoch=0-step=300.ckpt' as top 1
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
Running evaluation: 2247it [00:00, 25550.55it/s]
  return arr.astype(dtype, copy=True)
[32m[I 2022-08-21 22:00:33,560][0m Trial 9 finished with value: 0.11801514944531141 and parameters: {'num_encoder_layers': 14, 'num_decoder_layers': 10}. Best is trial 3 with value: 0.08734118106732694.[0m


Number of finished trials: 10
Best trial:
  Value: 0.08734118106732694
  Params: 
    num_encoder_layers: 6
    num_decoder_layers: 10
4261.750772714615
