In [0]:
%run "./Forecasting_Models"

In [0]:
%run "./General_Functions"

In [0]:
%run "./Performance_Metrics"

#### Notebook with functions to perform hyperparameters tuning of forecasting models

This notebook contains all the functions related to the hyperparameters tuning process of the solution, where in this
context tuning involves the training, validation and selection steps. This process is done using the HyperOpt
optimization framework, which uses a bayesian optimization approach to perform an informed search over the solution
space in order to find the best possible set of hyperparameters.

As the modelling stage is done at the product (SKU) level, the hyperparameter optimization process happens independently
for each product.

The functions included are:

| Function | Description |
| -------- | ----------- |
| `objective_function` | objective function used within the hyperparameters optimization process. This function defines, trains and evaluates a forecasting model as part of the procedure that happens during tuning |
| `tune_ts_model` | performs hyperparameter tuning of a forecasting model using the bayesian optimization framework "HyperOpt" |

###### Definition of functions

In [0]:
def objective_function(params, data, model_name, start_val, end_val, holidays=False, df_frds=None):
    """
    Defines, trains and validates a forecasting model defined by the algorithm in "model_name" and the hyperparameters
    given in "params". First the input series is split into train and validation windows, then the model is trained with
    the train portion and validated with the validation portion using the WAPE metric.

    This function serves as the objective function to optimize during tuning, where the optimized metric is the
    validation WAPE.

    Parameters
    __________
        params (dict): Dictionary with the hyperparameters to use to define the model.
        data (pd.DataFrame): Dataset with the time series.
        model_name (str): Algorithm to use to define the model.
        start_val (str): Start of validation set (included).
        end_val (str): End of validation set (included).
        holidays (bool, defaults to False): Flag to indicate whether the holidays are included or not.
        df_holis (pd.DataFrame, defaults to None): Dataset with holidays.

    Returns
    _______
        dict : Dictionary with the results of the model defined by "params". This include:
            - Validation WAPE (objective metric)
            - Train WAPE
    """
    # Splitting the series
    df_train, df_val = split_series(data, start_val, end_val)

    # Validating if holidays are included
    if holidays and (model_name == "sarimax"):
        exog = df_val["holiday"].values
    elif holidays and (model_name == "prophet"):
        df_holidays = df_frds.copy()
    else:
        exog = None
        df_holidays = None

    # Validating algorithm to use
    if model_name == "sarimax":
        # Obtaining trained SARIMAX model
        model = obtain_sarimax(df_train, params, holidays=holidays)

        # Retrieving fitted values for train set
        train_fcsts = model.fittedvalues

        # Generating forecast for validation set
        val_fcsts = model.predict(
            start=len(df_train),
            end=len(df_train) + len(df_val) - 1,
            exog=exog,
            typ="levels"
        )

        # Calculating performance metrics
        train_wape = wape(df_train["y"].values, train_fcsts.values)
        val_wape = wape(df_val["y"].values, val_fcsts.values)

    elif model_name == "prophet":
        # Obtaining trained Prophet model
        model = obtain_prophet(df_train, params, df_holidays=df_holidays)

        # Generating forecast for the entire series
        val_fcsts = model.predict(data)[["ds", "yhat"]]

        # Splitting into fitted values and forecasts
        train_fcsts = val_fcsts[val_fcsts["ds"] < start_val]
        val_fcsts = val_fcsts[val_fcsts["ds"] >= start_val]

        # Calculating performance metrics
        train_wape = wape(df_train["y"].values, train_fcsts["yhat"].values)
        val_wape = wape(df_val["y"].values, val_fcsts["yhat"].values)

    else:
        raise RuntimeError("Specified algorithm is not supported or it wasnt entered correctly")

    return {"loss": val_wape, "train_wape": train_wape, "params": params, "status": STATUS_OK}

In [0]:
def tune_ts_model(model_name, search_space, num_evals, data, start_val, end_val, holidays=False, df_frds=None):
    """
    Performs hyperparameter tuning of a forecasting model based on native time series techniques using the bayesian
    optimization framework "HyperOpt".

    The metric to optimize during tuning is the WAPE on the validation set, which is done by means of a common single
    validation set approach.

    Parameters
    __________
        model_name (str): Algorithm to use to define the model.
        search_space (dict): Dictionary with the hyperparameter search space defined in hyperopt.
        num_evals (int): Number of evaluations to perform over the search space during tuning.
        data (pd.DataFrame): Dataset with the time series.
        start_val (str): Start of validation set (included).
        end_val (str): End of validation set (included).
        holidays (bool, defaults to False): Flag to indicate whether the holidays are included or not.
        df_holis (pd.DataFrame, defaults to None): Dataset with holidays.

    Returns
    _______
        dict : Dictionary with the best results found after the optimization. This includes:
            - Validation WAPE
            - Train WAPE
            - Set of the best hyperparameters that produce the above metrics.
    """
    # Defining optimization algorithm and results object
    optimizer = tpe.suggest
    trials = Trials()

    # Defining objective function with additional arguments
    mod_function = partial(
        objective_function,
        data=data,
        model_name=model_name,
        start_val=start_val,
        end_val=end_val,
        holidays=holidays,
        df_frds=df_frds
    )

    # Performing optimization for model tuning
    argmin = fmin(
        fn=mod_function,
        space=search_space,
        algo=optimizer,
        max_evals=num_evals,
        trials=trials,
        verbose=False
    )

    # Extracting optimization results
    df_results = pd.DataFrame(trials.results)

    # Sorting according to validation results
    df_results = df_results.sort_values(by="loss", ascending=True).reset_index(drop=True)

    return {"train_wape": df_results["train_wape"][0], "val_wape": df_results["loss"][0],"params": df_results["params"][0]}
