In [None]:
from sktime.forecasting.model_evaluation import evaluate
import numpy as np
import pandas as pd
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.forecasting.model_selection._split import BaseSplitter
from sktime.forecasting.base import ForecastingHorizon
import warnings
import os
from app.data_managers.namespaces import data_ns

warnings.filterwarnings("ignore")
path = os.path.join("data", "CURATED", "ENERGY_SETTLEMENT_PRICE.csv")

data = pd.read_csv(path, index_col=data_ns.TIME, parse_dates=[data_ns.TIME]).asfreq("H")

In [None]:
from app.modeling.splitter import split_series
    
train, test = split_series(
    data.squeeze(),
    train_start="2020-01-01",
    train_end="2022-01-01",
    test_len=365*24
)

cv_data = pd.concat((train, test)).interpolate()

In [None]:
cv_data

In [None]:
from app.modeling import get_splitter

splitter = get_splitter(intial_window=len(train), testing=True, frac=0.01)
cv = splitter.split(cv_data)

In [None]:
from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import MeanAbsoluteError, MeanAbsolutePercentageError, MeanSquaredError
from app.modeling.backtesting import TSBacktesting
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.compose import make_reduction
from sklearn.linear_model import LinearRegression


prophet = Prophet(
    freq="H",
    add_country_holidays={'country_name': 'Poland'},
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=True
)

models = {
    "SEASONAL_NAIVE_MEAN": NaiveForecaster(strategy="mean", sp=24),
    "SEASONAL_NAIVE_MEAN_3_DAYS": NaiveForecaster(strategy="mean", sp=24, window_length=72),
    # "PROPHET": prophet,
    # "DRIFT": NaiveForecaster(strategy="drift"),
    # "EXP_SM00THING": ExponentialSmoothing(sp=12, trend="add", seasonal="add"),
    # "SEASONAL_NAIVE": NaiveForecaster(strategy="mean", sp=1, window_length=24),
    # "REGRESSION": make_reduction(estimator=LinearRegression(), window_length=3),
    # "AR_1": ARIMA(order=(3,0,0)),
    # "Theta": ThetaForecaster(sp=24),
    # "AR_24": ARIMA(order=(24,0,0))
}


bt = TSBacktesting(splitter, models=models)

In [None]:
d = bt.evaluate(cv_data)
d

In [None]:
bt.errors_

In [None]:
from __future__ import annotations
from abc import ABC, abstractmethod
from sktime.transformations.series.outlier_detection import HampelFilter
from typing import Optional
from datetime import datetime
from app.data_managers.namespaces import data_ns

        

from sklearn.pipeline import Pipeline


pipe = Pipeline([
    # ("interploate", LinearInterpolator()),
    # ("trend", TrendCreator()),
    # ("season", SeasonIndicatorCreator()),
    # ("weekend", WeekendIndicatorCreator()),
    # ("day_of_week", DayOfWeekIndicatorCreator()),
    # ("out", OutlierFlagCreator(return_bool=False))
])

X = pipe.fit_transform(data)
X

In [None]:
from app.modeling.transformers.transformers import FuelPricesProvider


x = FuelPricesProvider().transform(X)
x

In [None]:
vals["ACTUAL"] = valid_data
vals

In [None]:
day = 11

day_data = valid_data.iloc[24 * day: 24 * (day + 1)]
day_data.plot()

In [None]:
import seaborn as sns

data["HOUR"] = data.index.hour
data["MONTH"] = data.index.month
data["YEAR"] = data.index.year
data["MONTH_DAY"] = str(data.index.month) + str(data.index.day) 

sns.barplot(data, x="HOUR", y="VALUE");

In [None]:
sns.barplot(data, x="MONTH_DAY", y="VALUE");

In [None]:
sns.barplot(data, x="MONTH", y="VALUE");

In [None]:
sns.barplot(data, x="YEAR", y="VALUE");

In [None]:
f = data.groupby("DAY_OF_YEAR")["VALUE"].mean().sort_values(ascending=False)
f.loc[3]

In [None]:
from sktime.transformations.series.outlier_detection import HampelFilter

hf = HampelFilter(window_length=24*7)
corr = hf.fit_transform(data["VALUE"])

hf = HampelFilter(window_length=24*7, return_bool=True)
flag = hf.fit_transform(data["VALUE"])

In [None]:
data["OUTLIER"] = list(map(int, flag))
data

In [None]:
from matplotlib import pyplot as plt

ch = data.iloc[1000: 1300]
ch["VALUE"].plot()
ot = ch.loc[ch["OUTLIER"] == 1]
plt.scatter(ot.index, ot["VALUE"], c='r')

In [None]:
from matplotlib import pyplot as plt
from sktime.utils.plotting import plot_series


plot_series(*map(lambda x: vals[x].iloc[-100:], vals.columns), labels= vals.columns.to_list(), y_label='Electricity Price', x_label='Time');

# plt.plot(valid_data)
# vals.plot()


In [None]:
vals.T.values