# Modelagem HTS

Neste notebook será feita a avaliação dos modelos a serem utilizados para compor a hierarquia. Além disso, a própria conciliação também será feita aqui.

## Bibliotecas

In [1]:
import pandas as pd

from statsforecast import StatsForecast

from utilsforecast.losses import rmse, mae, smape, mase, scaled_crps, mqloss
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import evaluate
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut, MinTrace
from hierarchicalforecast.utils import aggregate, HierarchicalPlot

  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore
  from .autonotebook import tqdm as notebook_tqdm


## Identificação dos melhores modelos por continente.

In [2]:
help(rmse)

Help on function rmse in module utilsforecast.losses:

rmse(df: pandas.core.frame.DataFrame, models: List[str], id_col: str = 'unique_id', target_col: str = 'y') -> pandas.core.frame.DataFrame
    Root Mean Squared Error (RMSE)

    RMSE measures the relative prediction
    accuracy of a forecasting method by calculating the squared deviation
    of the prediction and the observed value at a given time and
    averages these devations over the length of the series.
    Finally the RMSE will be in the same scale
    as the original time series so its comparison with other
    series is possible only if they share a common scale.
    RMSE has a direct connection to the L2 norm.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, actual values and predictions.
    models : list of str
        Columns that identify the models predictions.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (d

### Importando DataFrames das previsões e valores actual

#### SARIMAX

In [15]:
# Previsoes do sarimax
sarimax_df = pd.read_csv("00_sarimax_forecasts.csv")
sarimax_df = sarimax_df.loc[sarimax_df["continent"] != "Oceania"]

# Previsoes do sarimax oceania
oceania_sarimax_df = pd.read_csv("oceania_sarimax_forecasts.csv")

# Concatenando oceania e geral
sarimax_df = pd.concat([sarimax_df, oceania_sarimax_df])

# Renomeando colunas
sarimax_df=sarimax_df.rename(columns={"forecast": "sarimax_forecast"})

# Gerando rmse do sarimax
rmse_sarimax = rmse(sarimax_df, models=["sarimax_forecast"], target_col="actual", id_col="continent")

rmse_sarimax

Unnamed: 0,continent,sarimax_forecast
0,Africa,118.878661
1,America_Central_E_Caribe,392.914143
2,America_Do_Norte,5861.272674
3,America_Do_Sul,39006.377839
4,Asia,1575.86579
5,Europa,8383.206818
6,Oceania,393.809531
7,TOTAL,38088.785906


#### XGBOOST

In [16]:
# Importando dataset e renomeando coluna da previsao
xgboost_df = pd.read_csv("xgboost_forecasts.csv")
xgboost_df = xgboost_df.rename(columns={"forecast": "xgboost_forecast"})

# Gerando tabela do rmse
rmse_xgboost = rmse(xgboost_df, models=["xgboost_forecast"], target_col="actual", id_col="continent")
rmse_xgboost.loc[rmse_xgboost["continent"] == "TOTAL (Agregado)", "continent"] = "TOTAL"
rmse_xgboost

Unnamed: 0,continent,xgboost_forecast
0,Africa,79.142045
1,America_Central_E_Caribe,262.88711
2,America_Do_Norte,2725.095099
3,America_Do_Sul,13583.029284
4,Asia,918.125635
5,Europa,4340.333062
6,Oceania,216.755227
7,TOTAL,18387.481725


In [17]:
xgboost_sarimax_df = rmse_xgboost.merge(rmse_sarimax, on="continent")
xgboost_sarimax_df

Unnamed: 0,continent,xgboost_forecast,sarimax_forecast
0,Africa,79.142045,118.878661
1,America_Central_E_Caribe,262.88711,392.914143
2,America_Do_Norte,2725.095099,5861.272674
3,America_Do_Sul,13583.029284,39006.377839
4,Asia,918.125635,1575.86579
5,Europa,4340.333062,8383.206818
6,Oceania,216.755227,393.809531
7,TOTAL,18387.481725,38088.785906


#### ETS

In [None]:
# america do norte
ets_na_test = pd.read_csv("forecast_ets_america_do_norte.csv")
ets_na_test = ets_na_test.loc[ets_na_test["date"] > "2023-12-01",:]
ets_na_test["continent"] = "America_Do_Norte"
ets_na_test[""] = "America_Do_Norte"


# oceania
ets_oc_test = pd.read_csv("forecast_ets_oceania.csv")
ets_oc_test = ets_oc_test.loc[ets_oc_test["date"] > "2023-12-01",:]
ets_oc_test["continent"] = "Oceania"
ets_oc_test

# africa
ets_af_test = pd.read_csv("forecast_models_africa.csv")
ets_af_test = ets_af_test.loc[ets_af_test["date"] > "2023-12-01",:]
ets_af_test["continent"] = "Africa"
ets_af_test

# america do norte
ets_ac_test = pd.read_csv("forecast_models_america_central_e_caribe.csv")
ets_ac_test = ets_ac_test.loc[ets_ac_test["date"] > "2023-12-01",:]
ets_ac_test["continent"] = "America_Central_E_Caribe"
ets_ac_test

# america do norte
ets_as_test = pd.read_csv("forecast_models_america_do_sul.csv")
ets_as_test = ets_as_test.loc[ets_as_test["date"] > "2023-12-01",:]
ets_as_test["continent"] = "America_Do_Sul"
ets_as_test

# asia
ets_a_test = pd.read_csv("forecast_models_asia.csv")
ets_a_test = ets_a_test.loc[ets_a_test["date"] > "2023-12-01",:]
ets_a_test["continent"] = "Asia"
ets_a_test

# europa
ets_eu_test = pd.read_csv("forecast_models_europa.csv")
ets_eu_test = ets_eu_test.loc[ets_eu_test["date"] > "2023-12-01",:]
ets_eu_test["continent"] = "Europa"
ets_eu_test

# total
ets_total_test = pd.read_csv("forecasts_comparison.csv")
ets_total_test = ets_total_test.loc[ets_total_test["date"] > "2023-12-01",:]
ets_total_test["continent"] = "TOTAL"
ets_total_test

Unnamed: 0,date,continent,actual,forecast,ets_model,arima_model,model_type,error,abs_error,pct_error
288,2024-01-01,TOTAL,163598.0,133662.015115,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,29935.984885,29935.984885,18.298503
289,2024-02-01,TOTAL,170305.0,129759.527,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,40545.473,40545.473,23.807565
290,2024-03-01,TOTAL,157683.0,123044.937907,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,34638.062093,34638.062093,21.966897
291,2024-04-01,TOTAL,102172.0,90285.614324,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,11886.385676,11886.385676,11.633702
292,2024-05-01,TOTAL,84116.0,73504.230933,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,10611.769067,10611.769067,12.615637
293,2024-06-01,TOTAL,82402.0,93858.014371,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,-11456.014371,11456.014371,-13.902593
294,2024-07-01,TOTAL,108089.0,96835.956171,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,11253.043829,11253.043829,10.410906
295,2024-08-01,TOTAL,91504.0,92279.277108,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,-775.277108,775.277108,-0.84726
296,2024-09-01,TOTAL,116003.0,78868.542781,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,37134.457219,37134.457219,32.011635
297,2024-10-01,TOTAL,130985.0,88924.819494,"ETS(M,A,M)","(1, 0, 0, 0, 1, 0, 0)",HYBRID,42060.180506,42060.180506,32.110685


In [19]:
ets_eu_test

Unnamed: 0,date,continent,actual,forecast,error,abs_error,pct_error,ets_model,arima_model,model_type
288,2024-01-01,Europa,28460.0,31446.349832,2986.349832,2986.349832,10.493148,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
289,2024-02-01,Europa,43208.0,34412.970811,-8795.029189,8795.029189,-20.355094,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
290,2024-03-01,Europa,39531.0,32191.378591,-7339.621409,7339.621409,-18.566749,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
291,2024-04-01,Europa,28563.0,24225.701114,-4337.298886,4337.298886,-15.185026,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
292,2024-05-01,Europa,19597.0,16058.418073,-3538.581927,3538.581927,-18.056753,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
293,2024-06-01,Europa,15600.0,16279.466939,679.466939,679.466939,4.355557,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
294,2024-07-01,Europa,27062.0,23906.166377,-3155.833623,3155.833623,-11.661494,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
295,2024-08-01,Europa,24719.0,24266.246572,-452.753428,452.753428,-1.831601,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
296,2024-09-01,Europa,22025.0,18274.542567,-3750.457433,3750.457433,-17.028184,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
297,2024-10-01,Europa,28376.0,23508.461584,-4867.538416,4867.538416,-17.153716,"ETS(M,N,A)","(2, 2, 2, 0, 12, 0, 0)",Hybrid_ETS+ARIMA
