In [1]:
%load_ext autoreload
%autoreload 2

In [181]:
import numpy as np
import pandas as pd
from cpilib import HICP


# Load conferences sentiment data
covariates = pd.read_csv('data/covariates.csv', parse_dates=True)
covariates["Date"] = pd.to_datetime(covariates['Date'])
monthly_covariates = (covariates.resample("MS", on="Date").mean().ffill() - 4.5) / 4.5
covariate_names = monthly_covariates.columns
monthly_covariates = monthly_covariates.reset_index()

# Force reload HICP data
hicp = HICP.from_cache(time_limit=1)


In [232]:
# Set plotly as default plotting backend
import pandas as pd
from darts import TimeSeries
from llm_inflation.model import MultivariateForecaster
from llm_inflation.utils import lapeyres_aggregation

pd.options.plotting.backend = "plotly"

EURO_AREA = "EA"
COICOP_SUBSET = ["FOOD", "NRG", "SERV", "IGD_NNRG"]

prices_df = hicp.prices[EURO_AREA][COICOP_SUBSET].rename_axis("date").reset_index().dropna()
weights_df = hicp.weights[EURO_AREA][COICOP_SUBSET]
weights_df.index = weights_df.index.year

prices_ts = TimeSeries.from_dataframe(prices_df, value_cols=COICOP_SUBSET, time_col="date")
prices_mom_ts = TimeSeries.from_dataframe(prices_df.set_index("date").log_returns().dropna().reset_index(), value_cols=COICOP_SUBSET, time_col="date")
prices_yoy_ts = TimeSeries.from_dataframe(prices_df.set_index("date").pct_change(12).dropna().reset_index(), value_cols=COICOP_SUBSET, time_col="date")

covariates_ts = TimeSeries.from_dataframe(monthly_covariates, value_cols=covariate_names, time_col="Date")



linreg = MultivariateForecaster("LinearRegression", horizon=6, lags=[-12, -9, -6, -3, -2, -1], prices=prices_yoy_ts, weights=None, covariates=covariates_ts, lags_covariates=6)
random_forest = MultivariateForecaster("RandomForest", horizon=6, lags=[-12, -9, -6, -3, -2, -1], prices=prices_yoy_ts, weights=None, covariates=covariates_ts, lags_covariates=6)
xgboost = MultivariateForecaster("XGBoost", horizon=6, lags=[-12, -9, -6, -3, -2, -1], prices=prices_yoy_ts, weights=None, covariates=covariates_ts, lags_covariates=6)

linreg.fit()
random_forest.fit()

In [193]:
# print(f"Backtest for LinearRegression: {linreg.backtest(3)}")
# print(f"Backtest for RandomForest: {random_forest.backtest(3)}")
# #print(f"Backtest for XGBoost: {xgboost.backtest(3)}")

In [192]:
# from darts.explainability.shap_explainer import ShapExplainer

# shap_explainer = ShapExplainer(random_forest.model)

# results = shap_explainer.explain()
# shap_explainer.summary_plot()
# shap_explainer.force_plot_from_ts()

In [233]:
forecast_start = "2019-02-01"
horizon = 6

training_data, training_covariates, true_index = linreg.make_training_data(forecast_start=forecast_start, prices_df=prices_df)

linreg.model.fit(training_data, past_covariates=training_covariates)
prediction_yoy = linreg.model.predict(horizon).pd_dataframe()

In [234]:
reconstructed_components_df = true_index.copy()

# Calculate the reconstructed values using the year-on-year predictions
for date in prediction_yoy.index:
    reconstructed_components_df.loc[date] = reconstructed_components_df.loc[date - pd.DateOffset(years=1)] * (1 + prediction_yoy.loc[date])

In [235]:
final_result = lapeyres_aggregation(reconstructed_components_df, weights_df, hicp, country=EURO_AREA)

In [236]:
pd.concat(
    [
        final_result.rename("Predicted"),
        hicp.prices[EURO_AREA]["CP00"].loc[final_result.index].rename("Actual")
    ],
    axis=1
).plot()