In [17]:
import pandas as pd
import warnings
import os
from app.data_managers.namespaces import data_ns, files_ns

warnings.filterwarnings("ignore")
path = os.path.join(files_ns.DATA_FOLDER, files_ns.CURATED_FOLDER, files_ns.ENERGY_PRICE)

data = pd.read_csv(path, index_col=data_ns.TIME, parse_dates=[data_ns.TIME]).asfreq("H")

In [18]:
from app.modeling.splitter import split_series
    
train, test = split_series(
    data.squeeze(),
    train_start="2021-01-01",
    train_end="2022-01-01",
    test_len=365*24
)

cv_data = pd.concat((train, test))

In [19]:
from app.modeling.pipeline import Pipeline

from app.modeling.transformers.transformers import (
    CO2PricesProvider,
    EnergyDemandProvider,
    LinearInterpolator,
    OutlierFlagCreator,
    TemperatureProvider,
)

pipe = Pipeline([
    ("out", OutlierFlagCreator(return_bool=False)),
    ("interpolate", LinearInterpolator()),
    ("temperature", TemperatureProvider()),
    ("demand", EnergyDemandProvider()),
    ("co2", CO2PricesProvider()),
])

X = pipe.fit_transform(train.to_frame())
y, X = X[data_ns.VALUE], X.drop(data_ns.VALUE, axis=1)
X.head()

Unnamed: 0_level_0,Wind_Speed,IS_DAY_OFF,Temperature,DEMAND,CO2_PRICE
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01 00:00:00,10.0,1,0.0,15805.3,148.16
2021-01-01 01:00:00,3.0,1,1.0,15289.913,148.16
2021-01-01 02:00:00,5.0,1,1.0,14682.838,148.16
2021-01-01 03:00:00,5.0,1,1.0,14100.713,148.16
2021-01-01 04:00:00,3.0,1,1.0,13705.113,148.16


In [None]:
from app.modeling import get_splitter

splitter = get_splitter(intial_window=len(train), testing=False, frac=0.1)

In [None]:
from sklearn.linear_model import LinearRegression
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.theta import ThetaForecaster
from app.modeling.decorators import save_plots, save_results
from app.modeling.backtesting import TSBacktesting


prophet = Prophet(
    freq="H",
    add_country_holidays={'country_name': 'Poland'},
    yearly_seasonality=True, # type: ignore
    weekly_seasonality=True, # type: ignore
    daily_seasonality=True # type: ignore
)

models = {
    "SEASONAL_NAIVE_MEAN_3_DAYS": NaiveForecaster(strategy="mean", sp=24, window_length=24*3),
    "SEASONAL_NAIVE_MEAN_7_DAYS": NaiveForecaster(strategy="mean", sp=24, window_length=24*7),
    "SEASONAL_NAIVE_MEAN_WEEKLY": NaiveForecaster(strategy="last", sp=24*7),
    "NAIVE_LAST": NaiveForecaster(),
    "PROPHET": prophet,
    "DRIFT": NaiveForecaster(strategy="drift"),
    "EXP_SMOOTHING": ExponentialSmoothing(sp=24, trend="add", seasonal="add"),
    "REGRESSION": make_reduction(estimator=LinearRegression(), window_length=3),
    "ARIMA(3,0,2)": ARIMA(order=(3,0,2)),
    "Theta": ThetaForecaster(sp=24),
}

bt = TSBacktesting(
    splitter,
    models=models,
    decorators=[
        save_results, # type: ignore
        save_plots(slice=slice(-24*7*3, -24*7*2), freq="H")
    ]
)

In [None]:
import logging
logging.basicConfig()
# to exclude all progress information
logging.getLogger().setLevel(logging.WARNING)
# to include all progress information
# logging.getLogger().setLevel(logging.INFO)

In [None]:
results = bt.evaluate(y, X=X, strategy="refit", backend="loky")
results

In [None]:
errors = bt.errors_.copy()
errors[['MAE', "RMSE"]] = errors[['MAE', "RMSE"]].round(2)
errors["MAPE"] = errors["MAPE"].apply(lambda x: f"{x:.2%}")
errors