# Hyperparameter Tuning with Optuna

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

import os

# change current working directory
os.chdir('/content/gdrive/My Drive/MRP/')

In [None]:
!pip install -U optuna
!pip install optuna-integration
!pip install 'u8darts[all]'

In [None]:
import warnings
warnings.filterwarnings(
    "ignore"
)

import logging
logging.disable(logging.CRITICAL)

import datetime
import numpy as np
import pandas as pd
from project_code import processing_functions as pf
import time

from darts.dataprocessing.transformers import Scaler
from darts.metrics import mae, rmse
from darts.models import (BlockRNNModel, ExponentialSmoothing, LightGBMModel, NBEATSModel, XGBModel)
from pytorch_lightning.callbacks import Callback, EarlyStopping
import torch

In [None]:
file_path_models = 'models/'
data_file = 'data/bbm_data_incl_outliers.csv'
hyperparam_file = f'{file_path_models}optuna_results.json'

bbm_data = pd.read_csv(data_file, index_col='date')

In [None]:
target = pf.create_timeseries(bbm_data, 'sunshine_hr')

# create past covariates as stacked timeseries of exogenous variables
past_covariates = pf.get_covariate_ts(bbm_data)

# create training and validation datasets
# Save 2023 data for testing, use 1994 to 2021 for training, and 2022 for validation
training_cutoff = pd.Timestamp(year=2022, month=12, day=31)
validation_cutoff = pd.Timestamp(year=2021, month=12, day=31)

target_train, _ = target.split_after(training_cutoff) # test data will not be used in this notebook (see mrp_experiments.ipynb)
target_train, target_val = target_train.split_after(validation_cutoff)

covariates_train, _ = past_covariates.split_after(training_cutoff)
covariates_train, _ = covariates_train.split_after(validation_cutoff) # train/val split does not  need to be explicitly created; Darts matches up the required time slices

covariate_scaler = Scaler()
covariate_scaler.fit(covariates_train)
past_covariates_trf = covariate_scaler.transform(past_covariates) #scale based on training data to avoid information leakage


# Hyperparameter Tuning

## GPU Details

In [None]:
!nvidia-smi

In [None]:
SEED = 0
N_TRIALS = 100
MAX_NUM_EPOCHS = 150
FORECAST_HORIZONS = (1, 3, 7, 14, 28)
INPUT_CHUNK_LENGTHS = [x*2 for x in FORECAST_HORIZONS]
LAGS = list(np.unique([2*x if x < 28 else x for x in FORECAST_HORIZONS]))

all_results = {}

**Note:** This notebook is intended and structured for a single run; however, in practice, there were several interruptions due to Colab runtime disconnections, which necessitated reading in the most current results file to continue from, as well as revising certain key lines as appropriate (e.g. *for fh in FORECAST_HORIZONS[2:]:*)  The following code can be used for such cases, and references to *all_results* can be updated to *current_results*.

In [None]:
# current_results = pf.read_json_file(file=hyperparam_file, output_type='dict')
# current_results

## N-BEATS

In [None]:
version = 'generic'

for fh in FORECAST_HORIZONS:

    model_name = f'optuna_nbeats_{version}_fh{fh}'

    print(f'\nRunning Experiment for {model_name}...\n')

    def objective_nbeats(trial):
        """Hyperparameter search objective"""
        torch.manual_seed(SEED)

        pruner = pf.PyTorchLightningPruningCallback(trial,
                                                    monitor='val_loss')
        early_stopper = EarlyStopping(
            monitor='val_loss',
            min_delta=0.001, # min change required to qualify as an improvement
            patience=10, # num validation epochs w/ no improvement before training is stopped
            verbose=True,
            mode='min'
        )

        callbacks = [pruner, early_stopper]

        if torch.cuda.is_available():
            pl_trainer_kwargs = {
                'accelerator': 'gpu',
                'callbacks': callbacks,
            }
        else:
            pl_trainer_kwargs = {'callbacks': callbacks}

        input_chunk_length = trial.suggest_categorical('input_chunk_length', INPUT_CHUNK_LENGTHS)
        num_stacks =  trial.suggest_categorical('num_stacks', [10, 20, 30]) # only used in model if generic_architecture is set to True
        num_blocks = trial.suggest_categorical('num_blocks', [1, 2, 3])
        num_layers  = trial.suggest_categorical('num_layers', [3, 4, 5])
        layer_widths = trial.suggest_categorical('layer_widths', [256, 512])
        dropout = trial.suggest_float('dropout', 0, 0.4)
        activation = trial.suggest_categorical('activation', ['ReLU', 'LeakyReLU'])
        learning_rate = trial.suggest_float("lr",  1e-5, 1e-1, log=True)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])


        model = NBEATSModel(
                            random_state=1,
                            input_chunk_length=input_chunk_length,
                            output_chunk_length=fh,
                            num_stacks=num_stacks,
                            num_blocks=num_blocks,
                            num_layers=num_layers,
                            layer_widths=layer_widths,
                            dropout=dropout,
                            activation=activation,
                            batch_size=batch_size,
                            n_epochs=MAX_NUM_EPOCHS,
                            generic_architecture=True if version == 'generic' else False,
                            pl_trainer_kwargs=pl_trainer_kwargs,
                            optimizer_kwargs={'lr': learning_rate},
                            log_tensorboard=True,
                            model_name = f'{model_name}_{datetime.datetime.now().strftime(("%Y%m%d-%H%M%S"))}',
                            save_checkpoints=True,
                            force_reset=True,
                        )

        model.fit(
                series=target_train,
                past_covariates=past_covariates, #N-BEATS does not require scaling
                val_series=target_val,
                val_past_covariates=past_covariates
                )

        y_pred = model.predict(n=fh)
        rmse_result = rmse(y_pred, target_val[:fh])

        return rmse_result

    hyp_search_results = pf.hyperparameter_search(objective_nbeats, n_trials=N_TRIALS, model_name=model_name)
    best_num_epochs = pf.get_best_num_epochs(model_name)
    hyp_search_results[model_name]['best_parameters']['n_epochs'] = best_num_epochs
    all_results.update(hyp_search_results)
    # current_results.update(hyp_search_results)
    pf.post_results(all_results, hyperparam_file, 'w')
    # pf.post_results(current_results, hyperparam_file, 'w')



In [None]:
# display(all_results)
# display(current_results)

## LSTM and GRU

In [None]:
for version in ['LSTM', 'GRU']:

    for fh in FORECAST_HORIZONS:

        model_name = f'optuna_{version.lower()}_fh{fh}'
        print(f'\nRunning Experiment for {model_name}...\n')

        def objective_rnn(trial):
            """Hyperparameter search objective"""

            torch.manual_seed(SEED)

            pruner = pf.PyTorchLightningPruningCallback(trial,
                                                        monitor='val_loss')
            early_stopper = EarlyStopping(
                monitor='val_loss',
                min_delta=0.001, # min change required to qualify as an improvement
                patience=10, # num validation epochs w/ no improvement before training is stopped
                verbose=True,
                mode='min'
            )

            callbacks = [pruner, early_stopper]

            if torch.cuda.is_available():
                pl_trainer_kwargs = {
                    'accelerator': 'gpu',
                    'callbacks': callbacks,
                }
            else:
                pl_trainer_kwargs = {'callbacks': callbacks}


            input_chunk_length = trial.suggest_categorical('input_chunk_length', INPUT_CHUNK_LENGTHS)
            batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
            hidden_dim = trial.suggest_int('hidden_dim', 15, 40)
            n_rnn_layers = trial.suggest_int('n_rnn_layers', 2, 5)
            dropout = trial.suggest_float('dropout', 0, 0.4)
            learning_rate = trial.suggest_float('lr',  1e-5, 1e-1, log=True)


            model = BlockRNNModel(
                        random_state=1,
                        input_chunk_length=input_chunk_length,
                        output_chunk_length=fh,
                        model=version,
                        hidden_dim=hidden_dim,
                        n_rnn_layers=n_rnn_layers,
                        batch_size=batch_size,
                        n_epochs=MAX_NUM_EPOCHS,
                        dropout=dropout,
                        pl_trainer_kwargs=pl_trainer_kwargs,
                        optimizer_kwargs = {'lr': learning_rate},
                        log_tensorboard=True,
                        model_name=f'{model_name}_{datetime.datetime.now().strftime(("%Y%m%d-%H%M%S"))}',
                        save_checkpoints=True,
                        force_reset=True,
                )

            model.fit(
                    series=target_train,
                    past_covariates=past_covariates_trf,
                    val_series=target_val,
                    val_past_covariates=past_covariates_trf
                    )

            y_pred = model.predict(n=fh)
            rmse_result = rmse(y_pred, target_val[:fh])

            return rmse_result

        hyp_search_results = pf.hyperparameter_search(objective_rnn, n_trials=N_TRIALS, model_name=model_name)
        best_num_epochs = pf.get_best_num_epochs(model_name)
        hyp_search_results[model_name]['best_parameters']['n_epochs'] = best_num_epochs
        all_results.update(hyp_search_results)
        # current_results.update(hyp_search_results)
        pf.post_results(all_results, hyperparam_file, 'w')
        # pf.post_results(current_results, hyperparam_file, 'w')



In [None]:
# display(all_results)
# display(current_results)

## XGBoost


In [None]:
np.random.seed(SEED)

for fh in FORECAST_HORIZONS:

    model_name = f'optuna_xgboost_fh{fh}'
    print(f'\nRunning Experiment for {model_name}...\n')

    def objective_xgb(trial):
        """Hyper parameter search objective"""

        # hyperparameter space
        lags = trial.suggest_categorical('lags', LAGS)
        lags_past_covariates = trial.suggest_categorical('lags_past_covariates', LAGS)

        model = XGBModel(lags=lags,
                        lags_past_covariates=lags_past_covariates,
                        output_chunk_length=fh)

        model.fit(
            series=target_train,
            past_covariates=past_covariates_trf,
            val_series=target_val,
            val_past_covariates=past_covariates_trf,
            verbose=False
            )

        y_pred = model.predict(n=fh)
        rmse_result = rmse(y_pred, target_val[:fh])

        return rmse_result

    hyp_search_results = pf.hyperparameter_search(objective_xgb, n_trials=N_TRIALS, model_name=model_name)
    all_results.update(hyp_search_results)
    # current_results.update(hyp_search_results)
    pf.post_results(all_results, hyperparam_file, 'w')
    # pf.post_results(current_results, hyperparam_file, 'w')



In [None]:
# display(all_results)
# display(current_results)

## LightGBM

In [None]:
np.random.seed(SEED)

for fh in FORECAST_HORIZONS:

    model_name = f'optuna_lgbm_fh{fh}'
    print(f'\nRunning Experiment for {model_name}...\n')

    def objective_lgbm(trial):
        """Hyper parameter search objective"""

        # hyperparameter space
        lags = trial.suggest_categorical('lags', LAGS)
        lags_past_covariates = trial.suggest_categorical('lags_past_covariates', LAGS)

        model = LightGBMModel(lags=lags,
                        lags_past_covariates=lags_past_covariates,
                        output_chunk_length=fh,
                        verbose=-1)

        model.fit(
            series=target_train,
            past_covariates=past_covariates_trf,
            val_series=target_val,
            val_past_covariates=past_covariates_trf
            )

        y_pred = model.predict(n=fh)
        rmse_result = rmse(y_pred, target_val[:fh])

        return rmse_result

    hyp_search_results = pf.hyperparameter_search(objective_lgbm, n_trials=N_TRIALS, model_name=model_name)
    all_results.update(hyp_search_results)
    # current_results.update(hyp_search_results)
    pf.post_results(all_results, hyperparam_file, 'w')
    # pf.post_results(current_results, hyperparam_file, 'w')



In [None]:
# display(all_results)
# display(current_results)