In [1]:
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.sarimax import SARIMAX
from sktime.forecasting.croston import Croston
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.arch import StatsForecastGARCH
from sktime.transformations.series.outlier_detection import HampelFilter

from app.data_managers.namespaces import data_ns
from app.modelling.metrics import mae, rmse, maxae
from app.modelling.models import LinReg, RandomForrest, RegressionTree
from app.modelling.splitters import ExpandingWindowSplitter, SlidingWindowSplitter
from app.modelling.transformers import (
    CompletnessFilter,
    DormantFilter,
    ImputerBackTime,
    ImputerPandas,
    NanDropper,
)
from app.tuner.tuner import Tuner, BEST_MODEL

FORECAST_PERIOD = 24
forecast_horizon = np.arange(1, FORECAST_PERIOD + 1)

  from .autonotebook import tqdm as notebook_tqdm
2024-02-13 16:56:05,125	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
forecasters = {
    "PACK_1" : {
        "PROPHET": Prophet(),
        "CROSTON_0.01": Croston(smoothing=0.01),
        "NAIVE_DRIFT": NaiveForecaster(strategy="drift"),
    },
    "PACK_2" : {
        "PROPHET_DEFAULT": Prophet(
            freq="H",
            add_seasonality=True,
            daily_seasonality=True,
            weekly_seasonality=True,
            yearly_seasonality=True,
        ),
        "CROSTON_0.1": Croston(smoothing=0.1),
        "NAIVE_LAST_24H": NaiveForecaster(strategy="last", sp=24),
    },
    "PACK_3" : {
        "PROPHET_MULT_SEAS": Prophet(
            freq="H",
            add_seasonality=True,
            daily_seasonality=True,
            yearly_seasonality=True,
            seasonality_mode="multiplicative",
        ),
        "CROSTON_0.2": Croston(smoothing=0.2),
        "NAIVE_MEAN_30D_SP24H": NaiveForecaster(
            strategy="mean", window_length=30 * 24, sp=24
        ),
    },
    "PACK_4" : {
        "ARIMA_1": ARIMA(),
        "CROSTON_0.5": Croston(smoothing=0.5),
        "NAIVE_MEAN_30D": NaiveForecaster(strategy="mean", window_length=30 * 24),
    },
    "PACK_5": {
        "ARIMA_2": ARIMA((1, 1, 0)),
        "CROSTON_0.8": Croston(smoothing=0.8),
        "NAIVE_MEAN_3D": NaiveForecaster(strategy="mean", window_length=3 * 24),
    },
    "PACK_6" : {
        "LinReg": LinReg(),
        "NAIVE_LAST_48H": NaiveForecaster(strategy="last", sp=48),
        "NAIVE_MEAN_7D": NaiveForecaster(strategy="mean", window_length=7 * 24),
    },
    "PACK_7" : {
        "RandomForrest": RandomForrest(),
        "NAIVE_MEAN_7D_SP24H": NaiveForecaster(
            strategy="mean", window_length=7 * 24, sp=24
        ),
        "StatsForecastGARCH": StatsForecastGARCH(),
    },
    "PACK_8" : {
        "RegressionTree": RegressionTree(),
        "ARIMA_3": ARIMA((1, 1, 1)),
        "SARIMAX_D": SARIMAX((1, 0, 0), (1, 0, 0, 24)),
    }
}

Importing plotly failed. Interactive plots will not work.


In [4]:
splitters = {
    "ExpandingWindowSplitter": ExpandingWindowSplitter(
        fh=forecast_horizon, step_length=FORECAST_PERIOD
    ),
    "SlidingWindowSplitter_FP*60": SlidingWindowSplitter(
        fh=forecast_horizon,
        step_length=FORECAST_PERIOD,
        window_length=FORECAST_PERIOD * 60,
    ),
    "SlidingWindowSplitter_FP": SlidingWindowSplitter(
        fh=forecast_horizon, step_length=FORECAST_PERIOD, window_length=FORECAST_PERIOD
    ),
    "SlidingWindowSplitter_FP*7": SlidingWindowSplitter(
        fh=forecast_horizon, step_length=FORECAST_PERIOD, window_length=FORECAST_PERIOD * 7
    ),
    "SlidingWindowSplitter_FP*30": SlidingWindowSplitter(
        fh=forecast_horizon, step_length=FORECAST_PERIOD, window_length=FORECAST_PERIOD * 30
    ),
}

In [5]:
transformers = [
        DormantFilter(period=FORECAST_PERIOD + 48),
        CompletnessFilter(0.5),
        ImputerBackTime(period_to_take_value_from=24),
        HampelFilter(window_length=72),
        ImputerPandas(method="linear"),
        NanDropper(),
    ]

In [6]:
tuner = Tuner(
    forecasters=forecasters, 
    splitters=splitters,
    metrics={"mae": mae, "rmse": rmse, "maxae": maxae}, 
    strategies={BEST_MODEL: BEST_MODEL, "mean": "mean", "median": "median"},
    dates=["2023-04-20 00", "2023-09-20 00", "2023-12-07 00"],
    input_dir=data_ns.TRANSFORMED_DATA_DIR,
    result_dir=data_ns.FORECAST_RESULT_DIR,
    selection_dir=data_ns.SELECTED_DATA_DIR,
    max_forecasts=4,
    transformers=transformers,
    forecast_horizon=forecast_horizon,
    parallel_batch=10,
    engine="nsga_2",
)

In [8]:
tuner.tune(n_gen=8, n_pop=8, ev_metrics={"rmse": rmse}, result_dir="24h_8gen_8pop_nsga2", time_constraint=0.75)

  return reduction(axis=axis, out=out, **passkwargs)
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  return float(self._compared)
ERROR:root:Y_TRANSFORMER_ERROR
Traceback (most recent call last):
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\forecasting_pipeline_task.py", line 127, in _process_data
    y = transformer.fit_transform(y)
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\transformers.py", line 25, in fit_transform
    return self.fit(y).transform(y)
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\transformers.py", line 150, in transform
    raise ValueError("There is too much missing data.")
ValueError: There is too much missing data.
ERROR:root:Y_TRANSFORMER_ERROR
Traceback (most recent call last):
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\forecasting_pipeline_task.py"

[array([17.91063755,  7.41302831, 13.62564345,  0.33333333, -0.9537037 ]), array([17.91063755,  7.41302831, 13.62564345,  0.        , -0.9537037 ]), array([17.91063755,  7.41302831, 13.62564345,  0.        , -0.9537037 ]), array([17.91063755,  7.41302831, 13.62564345,  0.33333333, -0.9537037 ])]


  return reduction(axis=axis, out=out, **passkwargs)
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  return float(self._compared)
ERROR:root:Y_TRANSFORMER_ERROR
Traceback (most recent call last):
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\forecasting_pipeline_task.py", line 127, in _process_data
    y = transformer.fit_transform(y)
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\transformers.py", line 25, in fit_transform
    return self.fit(y).transform(y)
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\transformers.py", line 150, in transform
    raise ValueError("There is too much missing data.")
ValueError: There is too much missing data.
ERROR:root:Y_TRANSFORMER_ERROR
Traceback (most recent call last):
  File "c:\Users\Mambo\Desktop\bachelor_migration\app\modelling\forecasting_pipeline_task.py"

[array([17.91063755,  7.41302831, 13.62564345,  0.66666667, -0.9537037 ]), array([17.91063755,  7.41302831, 13.62564345,  0.66666667, -0.9537037 ]), array([17.91063755,  7.41302831, 13.62564345,  0.66666667, -0.9537037 ]), array([17.91063755,  7.41302831, 13.62564345,  0.        , -0.9537037 ])]


  return reduction(axis=axis, out=out, **passkwargs)
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  evaluation[f"{dependent_variable}_{str(metric)}"] = float(
  return float(self._compared)
  pickle.dump(res.F, open(os.path.join(result_dir, "final_F.pickle"), "wb"))
  pickle.dump(res.X, open(os.path.join(result_dir, "final_X.pickle"), "wb"))
