In [None]:
# ---------------------------------------------------------
# 0. Imports
# ---------------------------------------------------------
import pandas as pd

from hierarchicalforecast.utils import aggregate
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.methods import MinTrace
from hierarchicalforecast.evaluation import evaluate
from utilsforecast.losses import rmse

# ---------------------------------------------------------
# 1. Carregar dados de treino e teste
#    (antes de 2024 = treino, 2024 = teste)
# ---------------------------------------------------------
train_path = "xgboost_forecasts_TRAIN.csv"  # ajuste para seu caminho real
test_path  = "xgboost_forecasts.csv"

train = pd.read_csv(train_path)
test  = pd.read_csv(test_path)

# Converte data
for df in (train, test):
    df["date"] = pd.to_datetime(df["date"])

# Se você tiver um único CSV e quiser separar por data, seria algo como:
# data = pd.read_csv("todos_os_dados.csv")
# data["date"] = pd.to_datetime(data["date"])
# train = data[data["date"] < "2024-01-01"].copy()
# test  = data[data["date"] >= "2024-01-01"].copy()

# ---------------------------------------------------------
# 2. Preparar dados no formato esperado pelo HierarchicalForecast
#    - Ignoramos linhas onde continent == 'TOTAL' e deixamos o pacote
#      gerar o TOTAL pela agregação.
#    - Criamos uma coluna de nível superior ("World") constante.
#    - Renomeamos colunas para o padrão: ds (tempo), y (target)
#      e BaseModel (previsão do modelo base).
# ---------------------------------------------------------
def prepare_bottom_level(df: pd.DataFrame) -> pd.DataFrame:
    # Mantém apenas continentes (bottom level)
    df = df[df["continent"] != "TOTAL (Agregado)"].copy()

    df["World"] = "WORLD"  # nível mais agregado

    df = df.rename(
        columns={
            "date": "ds",
            "continent": "Continent",
            "actual": "y",          # alvo
            "forecast": "BaseModel" # previsões base do seu modelo
        }
    )
    return df[["World", "Continent", "ds", "y", "BaseModel"]]

bottom_train = prepare_bottom_level(train)
bottom_test  = prepare_bottom_level(test)

# ---------------------------------------------------------
# 3. Construir a hierarquia (Total -> Continente) com aggregate
#    spec define os níveis:
#    - ["World"]                        -> série TOTAL
#    - ["World", "Continent"]          -> séries por continente
# ---------------------------------------------------------
hierarchy_levels = [
    ["World"],
    ["World", "Continent"]
]

# Treino: contém y (actual) + BaseModel (previsão 1-step-ahead/insample)
Y_train_df, S_df, tags = aggregate(
    df=bottom_train,
    spec=hierarchy_levels,
    target_cols=["y", "BaseModel"]  # agrega ambos
)

# Teste: contém y (valor observado no teste) + BaseModel (previsão out-of-sample)
Y_test_df, _, _ = aggregate(
    df=bottom_test,
    spec=hierarchy_levels,
    target_cols=["y", "BaseModel"]
)

# ---------------------------------------------------------
# 4. Separar dataframes no formato esperado pela reconciliação
#    - Y_df: dados "históricos" + previsões insample para estimar covariância
#            (aqui estamos usando treino, com colunas y e BaseModel)
#    - Y_hat_df: previsões base no período de teste (futuro)
#            (coluna BaseModel será reconciliada)
# ---------------------------------------------------------
Y_df = Y_train_df.copy()  # contém: unique_id, ds, y, BaseModel

# Y_hat_df: previsões para o horizonte futuro (teste).
# O HierarchicalForecast só precisa das colunas: unique_id, ds e modelos (BaseModel).
Y_hat_df = Y_test_df[["unique_id", "ds", "BaseModel"]].copy()

# ---------------------------------------------------------
# 5. Definir o reconciliador MinT (MinTrace com mint_shrink)
#    Isso é o equivalente ao MinT do FPP3.
# ---------------------------------------------------------
reconcilers = [
    MinTrace(method="mint_shrink")  # MinT(Shrink),
    MinTrace(method="ols")
]

hrec = HierarchicalReconciliation(reconcilers=reconcilers)

# ---------------------------------------------------------
# 6. Rodar a reconciliação
#    - Y_hat_df: previsões base (teste)
#    - Y_df: dados com y e BaseModel no treino (para estimar matriz de covariância)
#    - S_df, tags: estrutura da hierarquia
# ---------------------------------------------------------
Y_rec_df = hrec.reconcile(
    Y_hat_df=Y_hat_df,
    Y_df=Y_df,
    S=S_df,
    tags=tags
)

# O resultado terá colunas:
#   - "BaseModel" (previsão base incoerente)
#   - "BaseModel/MinTrace_method-mint_shrink" (previsão reconciliada MinT)

print(Y_rec_df.head(15))

# ---------------------------------------------------------
# 7. (Opcional) Juntar com os valores reais de teste e avaliar RMSE
# ---------------------------------------------------------
# Juntando previsões reconciliadas com os valores reais do período de teste
# (Y_test_df tem a coluna "y" com os valores observados nesse período)
Y_eval = Y_rec_df.merge(
    Y_test_df[["unique_id", "ds", "y"]],
    on=["unique_id", "ds"],
    how="left"
)

# Definir tags de avaliação por nível (Total e Continentes)
eval_tags = {
    "Total":     tags["World"],               # nível agregado (WORLD)
    "Continents": tags["World/Continent"],    # nível por continente
}

evaluation = evaluate(
    df=Y_eval,
    metrics=[rmse],
    tags=eval_tags,
    train_df=Y_df,  # opcional, útil p/ métricas tipo MASE
    id_col="unique_id",
    time_col="ds",
    target_col="y"
)

#print(evaluation)


       unique_id         ds     BaseModel  \
0          WORLD 2024-01-01  141301.56578   
1          WORLD 2024-02-01  147725.31209   
2          WORLD 2024-03-01  139848.66964   
3          WORLD 2024-04-01  101127.15315   
4          WORLD 2024-05-01   84400.96647   
5          WORLD 2024-06-01   78341.32233   
6          WORLD 2024-07-01   90442.25192   
7          WORLD 2024-08-01  102752.61480   
8          WORLD 2024-09-01   95703.30306   
9          WORLD 2024-10-01  119333.71560   
10         WORLD 2024-11-01  141446.27665   
11         WORLD 2024-12-01  146871.80812   
12  WORLD/Africa 2024-01-01     365.55118   
13  WORLD/Africa 2024-02-01     355.60855   
14  WORLD/Africa 2024-03-01     382.53174   

    BaseModel/MinTrace_method-mint_shrink  
0                            141301.56578  
1                            147725.31209  
2                            139848.66964  
3                            101127.15315  
4                             84400.96647  
5              

In [12]:
(Y_rec_df["BaseModel"] - Y_rec_df["BaseModel/MinTrace_method-mint_shrink"])

0     -5.820766e-11
1      5.820766e-11
2      0.000000e+00
3      0.000000e+00
4      0.000000e+00
           ...     
103    0.000000e+00
104    0.000000e+00
105   -1.455192e-11
106    0.000000e+00
107    0.000000e+00
Length: 108, dtype: float64

In [18]:
Y_rec_df

Unnamed: 0,unique_id,ds,BaseModel,BaseModel/MinTrace_method-mint_shrink
0,WORLD,2024-01-01,141301.56578,141301.56578
1,WORLD,2024-02-01,147725.31209,147725.31209
2,WORLD,2024-03-01,139848.66964,139848.66964
3,WORLD,2024-04-01,101127.15315,101127.15315
4,WORLD,2024-05-01,84400.96647,84400.96647
...,...,...,...,...
91,WORLD/Oceania,2024-08-01,871.67633,871.67633
92,WORLD/Oceania,2024-09-01,964.49270,964.49270
93,WORLD/Oceania,2024-10-01,1297.83150,1297.83150
94,WORLD/Oceania,2024-11-01,973.06555,973.06555


In [20]:
train

Unnamed: 0,date,continent,actual,forecast,error,abs_error,pct_error
0,2002-01-01,TOTAL (Agregado),73318.0,79952.92000,-6634.921875,6634.921875,-9.049513
1,2002-02-01,TOTAL (Agregado),66973.0,77671.92000,-10698.921875,10698.921875,-15.974978
2,2002-03-01,TOTAL (Agregado),61748.0,75710.47000,-13962.468750,13962.468750,-22.612018
3,2002-04-01,TOTAL (Agregado),47035.0,54464.22000,-7429.218750,7429.218750,-15.795086
4,2002-05-01,TOTAL (Agregado),49495.0,47087.81000,2407.191406,2407.191406,4.863504
...,...,...,...,...,...,...,...
2107,2023-08-01,Oceania,729.0,677.66205,51.337952,51.337952,7.042243
2108,2023-09-01,Oceania,744.0,715.67220,28.327820,28.327820,3.807503
2109,2023-10-01,Oceania,804.0,769.83704,34.162964,34.162964,4.249125
2110,2023-11-01,Oceania,763.0,770.43330,-7.433289,7.433289,-0.974219


In [21]:
train = train[train["continent"] != "TOTAL (Agregado)"].copy()
train

Unnamed: 0,date,continent,actual,forecast,error,abs_error,pct_error
264,2002-01-01,Africa,1598.0,1481.17590,116.824097,116.824097,7.310644
265,2002-02-01,Africa,1479.0,1405.05200,73.947998,73.947998,4.999865
266,2002-03-01,Africa,1372.0,1409.86220,-37.862183,37.862183,-2.759634
267,2002-04-01,Africa,1046.0,1311.02820,-265.028198,265.028198,-25.337304
268,2002-05-01,Africa,1101.0,1117.00720,-16.007202,16.007202,-1.453878
...,...,...,...,...,...,...,...
2107,2023-08-01,Oceania,729.0,677.66205,51.337952,51.337952,7.042243
2108,2023-09-01,Oceania,744.0,715.67220,28.327820,28.327820,3.807503
2109,2023-10-01,Oceania,804.0,769.83704,34.162964,34.162964,4.249125
2110,2023-11-01,Oceania,763.0,770.43330,-7.433289,7.433289,-0.974219
