In [1]:
from sktime.forecasting.model_evaluation import evaluate
import numpy as np
import pandas as pd
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.forecasting.model_selection._split import BaseSplitter
from sktime.forecasting.base import ForecastingHorizon
import warnings
import os
from app.data_managers.namespaces import data_ns

warnings.filterwarnings("ignore")
path = os.path.join("data", "CURATED", "ENERGY_SETTLEMENT_PRICE.csv")

data = pd.read_csv(path, index_col=data_ns.TIME, parse_dates=[data_ns.TIME]).asfreq("H")

In [2]:
from app.modeling.splitter import split_series
    
train, test = split_series(
    data.squeeze(),
    train_start="2020-01-01",
    train_end="2022-01-01",
    test_len=365*24
)

cv_data = pd.concat((train, test)).interpolate()

In [3]:
cv_data

TIME
2020-01-01 00:00:00    180.00
2020-01-01 01:00:00    207.53
2020-01-01 02:00:00    160.00
2020-01-01 03:00:00    150.43
2020-01-01 04:00:00    141.85
                        ...  
2022-12-31 19:00:00    421.63
2022-12-31 20:00:00    409.86
2022-12-31 21:00:00    381.58
2022-12-31 22:00:00    328.69
2022-12-31 23:00:00    324.68
Freq: H, Name: VALUE, Length: 26304, dtype: float64

In [4]:
from app.modeling import get_splitter

splitter = get_splitter(intial_window=len(train), testing=True, frac=0.1)
cv = splitter.split(cv_data)

In [5]:
from typing import Iterable
from numpy import ndarray
from pandas.core.series import Series
from sklearn.pipeline import Pipeline

from app.modeling.transformers.transformers import (
    CO2PricesProvider,
    DayOffIndicatorCreator,
    EnergyDemandProvider,
    FuelPricesProvider,
    LinearInterpolator,
    OutlierFlagCreator,
    SeasonIndicatorCreator,
    TemperatureProvider,
    TrendCreator,
    WindSpeedProvider,
)

class Pipeline(Pipeline):
    def fit_transform(self, *args, **kwargs) -> pd.DataFrame:
        x = super().fit_transform(*args, **kwargs)
        return pd.DataFrame(x)


pipe = Pipeline([
    ("interploate", LinearInterpolator()),
    ("wind_speed", WindSpeedProvider()),
    ("temperature", TemperatureProvider()),
    # ("trend", TrendCreator()),
    ("season", SeasonIndicatorCreator()),
    ("day_off", DayOffIndicatorCreator()),
    # ("out", OutlierFlagCreator(return_bool=False)),
    ("fuel", FuelPricesProvider()),
    ("demand", EnergyDemandProvider()),
    ("co2", CO2PricesProvider()),
])

X = pipe.fit_transform(data)
X.head()

Unnamed: 0_level_0,VALUE,Wind_Speed,Temperature,SPRING,SUMMER,WINTER,IS_DAY_OFF,FUEL_PRICE,DEMAND,CO2_PRICE
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-01 01:00:00,72.05,13.0,8.0,0,0,1,1,3628.0,14978.538,32.5
2018-01-01 02:00:00,72.05,14.0,8.0,0,0,1,1,3628.0,14397.65,32.5
2018-01-01 03:00:00,72.05,13.0,9.0,0,0,1,1,3628.0,13789.463,32.5
2018-01-01 04:00:00,72.05,16.0,9.0,0,0,1,1,3628.0,13434.45,32.5
2018-01-01 05:00:00,71.15,16.0,8.0,0,0,1,1,3628.0,13285.238,32.5


In [7]:
from sklearn.linear_model import LinearRegression
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.theta import ThetaForecaster
from sktime.performance_metrics.forecasting import (
    MeanAbsoluteError,
    MeanAbsolutePercentageError,
    MeanSquaredError,
)

from app.modeling.backtesting import TSBacktesting


prophet = Prophet(
    freq="H",
    add_country_holidays={'country_name': 'Poland'},
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=True
)

models = {
    "SEASONAL_NAIVE_MEAN": NaiveForecaster(strategy="mean", sp=24),
    # "SEASONAL_NAIVE_MEAN_3_DAYS": NaiveForecaster(strategy="mean", sp=24, window_length=72),
    # "NAIVE_LAST": NaiveForecaster(),
    # "SEASONAL_NAIVE_MEAN_3_DAYS": NaiveForecaster(strategy="mean", sp=24, window_length=72),
    # "PROPHET": prophet,
    # "DRIFT": NaiveForecaster(strategy="drift"),
    # "EXP_SM00THING": ExponentialSmoothing(sp=12, trend="add", seasonal="add"),
    # "SEASONAL_NAIVE": NaiveForecaster(strategy="mean", sp=1, window_length=24),
    # "REGRESSION": make_reduction(estimator=LinearRegression(), window_length=3),
    # "AR_1": ARIMA(order=(3,0,0)),
    # "Theta": ThetaForecaster(sp=24),
    # "AR_24": ARIMA(order=(24,0,0))
}


bt = TSBacktesting(splitter, models=models)

In [8]:
d = bt.evaluate(cv_data)

In [9]:
bt.errors_

Unnamed: 0,MAE,MAPE,RMSE
SEASONAL_NAIVE_MEAN,342.412187,0.517594,401.757278
