# Modelagem HTS

Neste notebook será feita a avaliação dos modelos a serem utilizados para compor a hierarquia. Além disso, a própria conciliação também será feita aqui.

## Bibliotecas

In [2]:
import pandas as pd

from statsforecast import StatsForecast

from utilsforecast.losses import rmse, mae, smape, mase, scaled_crps, mqloss
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import evaluate
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut, MinTrace
from hierarchicalforecast.utils import aggregate, HierarchicalPlot

  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore
  from .autonotebook import tqdm as notebook_tqdm


## Identificação dos melhores modelos por continente.

In [4]:
help(rmse)

Help on function rmse in module utilsforecast.losses:

rmse(df: pandas.core.frame.DataFrame, models: List[str], id_col: str = 'unique_id', target_col: str = 'y') -> pandas.core.frame.DataFrame
    Root Mean Squared Error (RMSE)

    RMSE measures the relative prediction
    accuracy of a forecasting method by calculating the squared deviation
    of the prediction and the observed value at a given time and
    averages these devations over the length of the series.
    Finally the RMSE will be in the same scale
    as the original time series so its comparison with other
    series is possible only if they share a common scale.
    RMSE has a direct connection to the L2 norm.

    Parameters
    ----------
    df : pandas or polars DataFrame
        Input dataframe with id, actual values and predictions.
    models : list of str
        Columns that identify the models predictions.
    id_col : str (default='unique_id')
        Column that identifies each serie.
    target_col : str (d

### Importando DataFrames das previsões e valores actual

#### SARIMAX

In [None]:
# Previsoes do sarimax
sarimax_df = pd.read_csv("00_sarimax_forecasts.csv")
sarimax_df = sarimax_df.loc[sarimax_df["continent"] != "Oceania"]

# Previsoes do sarimax oceania
oceania_sarimax_df = pd.read_csv("oceania_sarimax_forecasts.csv")

# Concatenando oceania e geral
sarimax_df = pd.concat([sarimax_df, oceania_sarimax_df])

# Renomeando colunas
sarimax_df=sarimax_df.rename(columns={"forecast": "sarimax_forecast"})

# Gerando rmse do sarimax
rmse_sarimax = rmse(sarimax_df, models=["sarimax_forecast"], target_col="actual", id_col="continent")


#### XGBOOST

In [22]:
# Importando dataset e renomeando coluna da previsao
xgboost_df = pd.read_csv("xgboost_forecasts.csv")
xgboost_df = xgboost_df.rename(columns={"forecast": "xgboost_forecast"})

# Gerando tabela do rmse
rmse_xgboost = rmse(xgboost_df, models=["xgboost_forecast"], target_col="actual", id_col="continent")

rmse_xgboost

Unnamed: 0,continent,xgboost_forecast
0,Africa,79.142045
1,America_Central_E_Caribe,262.88711
2,America_Do_Norte,2725.095099
3,America_Do_Sul,13583.029284
4,Asia,918.125635
5,Europa,4340.333062
6,Oceania,216.755227
7,TOTAL (Agregado),18387.481725


In [25]:
xgboost_sarimax_df = rmse_xgboost.merge(rmse_sarimax, on="continent")
xgboost_sarimax_df

Unnamed: 0,continent,xgboost_forecast,sarimax_forecast
0,Africa,79.142045,118.878661
1,America_Central_E_Caribe,262.88711,392.914143
2,America_Do_Norte,2725.095099,5861.272674
3,America_Do_Sul,13583.029284,39006.377839
4,Asia,918.125635,1575.86579
5,Europa,4340.333062,8383.206818
6,Oceania,216.755227,393.809531
