In [2]:
import funcs.data_wrangling as dw
import pandas as pd
import numpy as np
#import seaborn as sns
#import matplotlib.pyplot as plt
#import plotly.express as px
#from matplotlib import rcParams
#from statsmodels.tsa.seasonal import STL
#from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsforecast import StatsForecast
from statsforecast.models import (
    AutoARIMA,
    HoltWinters,
    CrostonClassic as Croston, 
    HistoricAverage,
    DynamicOptimizedTheta as DOT,
    SeasonalNaive
)
#from datasetsforecast.losses import mse, mae, rmse

#rcParams['figure.figsize'] = 15, 5

import warnings
warnings.filterwarnings('ignore')

In [3]:
data = dw.ons_data(freq='h', ano_inicio=2000, ano_fim=2023, idreg="S")
df = dw.pipeline(data, update=False)

In [4]:
df.head()

Unnamed: 0_level_0,id_reg,desc_reg,load_mwmed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01 00:00:00,S,SUL,5777.0
2000-01-01 01:00:00,S,SUL,5580.7
2000-01-01 02:00:00,S,SUL,5098.7
2000-01-01 03:00:00,S,SUL,4753.7
2000-01-01 04:00:00,S,SUL,4584.1


In [12]:
df_ly = df.iloc[-(24*365):,:]
df_ly.head()

Unnamed: 0_level_0,id_reg,desc_reg,load_mwmed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-03-07 00:00:00,S,SUL,12638.106
2022-03-07 01:00:00,S,SUL,12158.367
2022-03-07 02:00:00,S,SUL,11829.226
2022-03-07 03:00:00,S,SUL,11627.201
2022-03-07 04:00:00,S,SUL,11574.747


In [13]:
df2 = dw.prepare_statsforecast_df(df_ly, "hourly_load")

In [14]:
df2.shape

(8760, 3)

# AutoArima

In [15]:
sf = StatsForecast(
    models= [AutoARIMA(season_length=24)],
    freq='H'
)

In [16]:
sf.fit(df2)

# Multiple models

In [None]:
models = [
    AutoARIMA(season_length=24),
    HoltWinters(),
    Croston(),
    SeasonalNaive(season_length=24),
    HistoricAverage(),
    DOT(season_length=24)
]

In [None]:
sf = StatsForecast(
    df=df2, 
    models=models,
    freq='H', 
    n_jobs=-1,
    fallback_model = SeasonalNaive(season_length=7)
)

In [None]:
forecasts_df = sf.forecast(h=48, level=[90])

forecasts_df.head()

In [None]:
sf.plot(df2,forecasts_df)

In [None]:
crossvaldation_df = sf.cross_validation(
    df=df2,
    h=24,
    step_size=24,
    n_windows=2
  )

crossvaldation_df.head()

In [None]:
def evaluate_cross_validation(df, metric):
    models = df.drop(columns=['ds', 'cutoff', 'y']).columns.tolist()
    evals = []
    for model in models:
        eval_ = df.groupby(['unique_id', 'cutoff']).apply(lambda x: metric(x['y'].values, x[model].values)).to_frame() # Calculate loss for every unique_id, model and cutoff.
        eval_.columns = [model]
        evals.append(eval_)
    evals = pd.concat(evals, axis=1)
    evals = evals.groupby(['unique_id']).mean(numeric_only=True) # Averages the error metrics for all cutoffs for every combination of model and unique_id
    evals['best_model'] = evals.idxmin(axis=1)
    return evals

In [None]:
evaluation_df = evaluate_cross_validation(crossvaldation_df, mse)

evaluation_df.head()

In [None]:
summary_df = evaluation_df.groupby('best_model').size().sort_values().to_frame()

summary_df.reset_index().columns = ["Model", "Nr. of unique_ids"]