In [4]:
!pip install holidays
!pip install py-tsdata



In [5]:
from tsdata.raw import load_data
import pandas as pd
import matplotlib.pyplot as plt

import holidays

# sktime functionaliy
from sktime.forecasting.arima import  AutoARIMA
from sktime.forecasting.base import ForecastingHorizon

from sktime.forecasting.model_selection import temporal_train_test_split

from sklearn.preprocessing import  OneHotEncoder, StandardScaler, PolynomialFeatures

# from sklearn.compose import ColumnTransformer
from sktime.transformations.panel.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tsdata.raw import  load_data
from sklearn.preprocessing import FunctionTransformer


# set style of plots
plt.style.use('Solarize_Light2')

### 2. Prepare the data

In [6]:
# Select country
victoria_holidays = holidays.AUS(subdiv='VIC')

vic_elec_df = (load_data("vic_elec")
 .assign(Time=lambda df: pd.to_datetime(df.Time))
)

victoria_electricity_2014 = vic_elec_df[
    vic_elec_df.Time.dt.year == 2014
]

victoria_electricity_2014 = (victoria_electricity_2014
 .groupby([victoria_electricity_2014.Time.dt.date])
# .groupby('Time')
 .agg({'Demand': 'sum', 'Temperature': 'max'})
 .assign(
     Demand=lambda df: df.Demand / 1e3,
     Time=lambda df: pd.to_datetime(df.index)
 )
 .assign(
     # assign whether its weekend or holiday
     day_type=lambda df: df.Time.apply(
         lambda time: 'Holiday' if time in victoria_holidays else
                      'Weekend' if time.weekday() > 4 else
                      'Weekday'
     )
 )
 # turn the day type to categorical
 .assign(day_type=lambda df: pd.Categorical(df.day_type))
 .set_index('Time')
)

start_date = min(victoria_electricity_2014.index)
end_date = max(victoria_electricity_2014.index)
victoria_electricity_2014 = victoria_electricity_2014.reindex(
    pd.date_range(start=start_date, end=end_date, freq='D')
)
victoria_electricity_2014.head(3)

Unnamed: 0,Demand,Temperature,day_type
2014-01-01,179.453794,26.0,Holiday
2014-01-02,190.778816,23.0,Weekday
2014-01-03,182.892121,22.2,Weekday


In [None]:
# just rename for now
y_train, y_test, X_train, X_test = temporal_train_test_split(
    victoria_electricity_2014.Demand,
    victoria_electricity_2014[['Temperature', 'day_type']]
)

# create forecast horizon
fh = ForecastingHorizon(y_test.index, is_relative=False)

numerical_transformer = Pipeline([
    ("scaler", StandardScaler()),
    ('polynomization', PolynomialFeatures(include_bias=False))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("categorical", OneHotEncoder(sparse=False), ['day_type']),
        ("numerical", numerical_transformer, ['Temperature'])
    ]
)

# pipeline with the preprocessing and attaching of names and index afterwards
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('lambda', FunctionTransformer(func=lambda X: pd.DataFrame(
        X,
        columns=preprocessor.get_feature_names_out(),
        # this has to be dynamic according to what is given, if we apply the transformation to another
        # dataframe it will not work
        # AUTO ARIMA requires an index
        index=X_train.index
    ), validate=False))
])

# Transform the data in a separate transformer
# For now, I couldn't do it using a ForecastingPipeline
X_train_transformed = pipeline.fit_transform(X=X_train, y=y_train)

forecaster = AutoARIMA()
forecaster.fit(y_train, X=X_train_transformed)
# auto_arima_forecaster.fit(y=y_train, X=X_train)

### What I would like to have (everything in one ForecastingPipeline)

In [None]:
# auto_arima_forecaster = ForecastingPipeline(
#     steps=[
#         ('pipe', pipeline),
#         ('tabularize', Tabularizer()),
#         ("forecaster", AutoARIMA(suppress_warnings=True))
#     ]
# )