In [2]:
import funcs.data_wrangling as dw
import pandas as pd
import numpy as np
#import seaborn as sns
#import matplotlib.pyplot as plt
#import plotly.express as px
#from matplotlib import rcParams
#from statsmodels.tsa.seasonal import STL
#from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsforecast import StatsForecast
from statsforecast.models import (
    AutoARIMA,
    HoltWinters,
    CrostonClassic as Croston, 
    HistoricAverage,
    DynamicOptimizedTheta as DOT,
    SeasonalNaive
)
#from datasetsforecast.losses import mse, mae, rmse

#rcParams['figure.figsize'] = 15, 5

import warnings
warnings.filterwarnings('ignore')

In [3]:
data = dw.ons_data(freq='h', ano_inicio=2000, ano_fim=2023, idreg="S")
df = dw.pipeline(data, update=False)

In [4]:
df.head()

Unnamed: 0_level_0,id_reg,desc_reg,load_mwmed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01 00:00:00,S,SUL,5777.0
2000-01-01 01:00:00,S,SUL,5580.7
2000-01-01 02:00:00,S,SUL,5098.7
2000-01-01 03:00:00,S,SUL,4753.7
2000-01-01 04:00:00,S,SUL,4584.1


In [12]:
# Último ano
df_ly = df.iloc[-(24*365):,:]
df_ly.head()

Unnamed: 0_level_0,id_reg,desc_reg,load_mwmed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-03-07 00:00:00,S,SUL,12638.106
2022-03-07 01:00:00,S,SUL,12158.367
2022-03-07 02:00:00,S,SUL,11829.226
2022-03-07 03:00:00,S,SUL,11627.201
2022-03-07 04:00:00,S,SUL,11574.747


In [13]:
df2 = dw.prepare_statsforecast_df(df_ly, "hourly_load")

In [14]:
df2.shape

(8760, 3)

# AutoArima

In [15]:
sf = StatsForecast(
    models= [AutoARIMA(season_length=24)],
    freq='H'
)

In [16]:
sf.fit(df2)

StatsForecast(models=[AutoARIMA])

# Multiple models

In [17]:
models = [
    AutoARIMA(season_length=24),
    HoltWinters(),
    Croston(),
    SeasonalNaive(season_length=24),
    HistoricAverage(),
    DOT(season_length=24)
]

In [18]:
sf = StatsForecast(
    df=df2, 
    models=models,
    freq='H', 
    n_jobs=-1,
    fallback_model = SeasonalNaive(season_length=7)
)

In [19]:
forecasts_df = sf.forecast(h=48, level=[90])

forecasts_df.head()

Unnamed: 0_level_0,ds,AutoARIMA,AutoARIMA-lo-90,AutoARIMA-hi-90,HoltWinters,HoltWinters-lo-90,HoltWinters-hi-90,CrostonClassic,SeasonalNaive,SeasonalNaive-lo-90,SeasonalNaive-hi-90,HistoricAverage,HistoricAverage-lo-90,HistoricAverage-hi-90,DynamicOptimizedTheta,DynamicOptimizedTheta-lo-90,DynamicOptimizedTheta-hi-90
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
hourly_load,2023-03-07 00:00:00,12242.286133,11727.636719,12756.936523,14797.978516,3227.750488,26368.207031,13723.993164,11205.140625,7196.831055,15213.450195,11627.321289,7858.384766,15396.257812,12119.828125,11590.079102,12788.010742
hourly_load,2023-03-07 01:00:00,11503.185547,10626.280273,12380.089844,14434.114258,2863.88623,26004.341797,13723.993164,10703.818359,6695.508789,14712.12793,11627.321289,7858.384766,15396.257812,11409.166992,10655.356445,12196.412109
hourly_load,2023-03-07 02:00:00,11013.06543,9881.473633,12144.65625,15509.942383,3939.714355,27080.169922,13723.993164,10415.939453,6407.629883,14424.249023,11627.321289,7858.384766,15396.257812,10958.120117,9908.47168,11853.03125
hourly_load,2023-03-07 03:00:00,10766.224609,9474.901367,12057.546875,15285.526367,3715.29834,26855.753906,13723.993164,10295.325195,6287.015625,14303.634766,11627.321289,7858.384766,15396.257812,10772.516602,9728.506836,11670.973633
hourly_load,2023-03-07 04:00:00,10701.494141,9319.984375,12083.00293,14938.970703,3368.742676,26509.199219,13723.993164,10342.305664,6333.996094,14350.615234,11627.321289,7858.384766,15396.257812,10793.220703,9716.607422,11882.041016


In [20]:
sf.plot(df2,forecasts_df)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [23]:
sf.sav

StatsForecast(models=[AutoARIMA,HoltWinters,CrostonClassic,SeasonalNaive,HistoricAverage,DynamicOptimizedTheta])

In [21]:
crossvaldation_df = sf.cross_validation(
    df=df2,
    h=24,
    step_size=24,
    n_windows=2
  )

crossvaldation_df.head()

KeyboardInterrupt: 

In [None]:
def evaluate_cross_validation(df, metric):
    models = df.drop(columns=['ds', 'cutoff', 'y']).columns.tolist()
    evals = []
    for model in models:
        eval_ = df.groupby(['unique_id', 'cutoff']).apply(lambda x: metric(x['y'].values, x[model].values)).to_frame() # Calculate loss for every unique_id, model and cutoff.
        eval_.columns = [model]
        evals.append(eval_)
    evals = pd.concat(evals, axis=1)
    evals = evals.groupby(['unique_id']).mean(numeric_only=True) # Averages the error metrics for all cutoffs for every combination of model and unique_id
    evals['best_model'] = evals.idxmin(axis=1)
    return evals

In [None]:
evaluation_df = evaluate_cross_validation(crossvaldation_df, mse)

evaluation_df.head()

In [None]:
summary_df = evaluation_df.groupby('best_model').size().sort_values().to_frame()

summary_df.reset_index().columns = ["Model", "Nr. of unique_ids"]