In [1]:
import numpy as np
from sktime.forecasting.naive import NaiveForecaster
from sktime.transformations.series.outlier_detection import HampelFilter

from app.data_managers.namespaces import data_ns
from app.modelling.metrics import mae, rmse, maxae
from app.modelling.splitters import ExpandingWindowSplitter, SlidingWindowSplitter
from app.modelling.transformers import (
    CompletnessFilter,
    DormantFilter,
    ImputerBackTime,
    ImputerPandas,
    NanDropper,
)
from app.tuner.tuner import Tuner, BEST_MODEL

FORECAST_PERIOD = 48
forecast_horizon = np.arange(1, FORECAST_PERIOD + 1)

  from .autonotebook import tqdm as notebook_tqdm
2024-02-04 21:46:53,179	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
forecasters = {
    "PACK_1" : {
        "NAIVE_DRIFT": NaiveForecaster(strategy="drift"),
        "NAIVE_LAST_24H": NaiveForecaster(strategy="last", sp=24)
    },
    "PACK_2" : {
        "NAIVE_LAST_48H": NaiveForecaster(strategy="last", sp=48),
        "NAIVE_MEAN_30D_SP24H": NaiveForecaster(
            strategy="mean", window_length=30 * 24, sp=24
        )
    },
}

In [3]:
splitters = {
    "ExpandingWindowSplitter": ExpandingWindowSplitter(
        fh=forecast_horizon, step_length=FORECAST_PERIOD
    ),
    "SlidingWindowSplitter": SlidingWindowSplitter(
        fh=forecast_horizon,
        step_length=FORECAST_PERIOD,
        window_length=FORECAST_PERIOD * 60,
    ),
}

In [4]:
transformers = [
        DormantFilter(period=FORECAST_PERIOD + 48),
        CompletnessFilter(0.5),
        #ImputerBackTime(period_to_take_value_from=24),
        #HampelFilter(window_length=72),
        ImputerPandas(method="linear"),
        NanDropper(),
    ]

In [5]:
tuner = Tuner(
    forecasters=forecasters, 
    splitters=splitters,
    metrics={"mae": mae, "rmse": rmse, "maxae": maxae}, 
    strategies={BEST_MODEL: BEST_MODEL, "mean": "mean", "median": "median"},
    dates=["2023-05-31 00", "2023-09-30 00", "2023-12-20 00", "2024-01-20 00"],
    input_dir=data_ns.TRANSFORMED_DATA_DIR,
    result_dir=data_ns.FORECAST_RESULT_DIR,
    selection_dir=data_ns.SELECTED_DATA_DIR,
    max_forecasts=4,
    transformers=transformers,
    forecast_horizon=forecast_horizon,
    parallel_batch=20,
    engine="nsga_3",
)

In [7]:
tuner.tune(n_gen=2, n_pop=10, ev_metrics={"mae": mae, "rmse": rmse, "maxae": maxae}, result_dir="FIRST_RESULTS")

ValueError: time data "2023-05-31" doesn't match format "%Y-%m-%d %H", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.