In [None]:
import numpy as np
import pandas as pd
import pathlib
import plotly.graph_objects as go
from neuralprophet import NeuralProphet

from sktime.forecasting.base import ForecastingHorizon

# Multivariate neural prophet
Use NeuralProphet to generate baseline quantile forecasts. 

In [None]:
# EXPERIMENT_NAME = "neuralprophet_covariates"

# EXPERIMENT_NAME = "neuralprophet_covariates_onlyclimate"

EXPERIMENT_NAME = "neuralprophet_covariates_noclimate"

In [None]:
#Collect FRED related datasets 
all_data = pd.read_csv("./datasets/all_data.csv", index_col=0)
all_data.index = pd.to_datetime(all_data.index)

#Get climate related covariates to add in
climate_data = pd.read_csv('./data_files/climate_vars.csv')
climate_data['date'] = pd.to_datetime(climate_data['date'])
climate_data = climate_data.set_index('date')

#Combine the two sources of covariates
covariates_all = all_data.join(climate_data, how='left')
    
covariates_all


## Forecasting function

AutoARIMA is a univariate forecasting method. This function expects that `df` has a time series index (monthly samples) and the target data column is the 0th index in the dataframe (see cell line 15).

In [None]:
def reformat_outputs(df):
    print(category)
    return df.rename(columns={'ds':'REF_DATE', 'y':category}).set_index('REF_DATE')

def format_neuralprophet_inputs(df):
    #Modify to act as an input to neural prophet 
#     cat_name = str.split(category, '_x')[0]
    cat_name = category

    print(cat_name)
    return df.reset_index().rename(columns={'index':'ds', cat_name:'y'})

def get_forecast_NP_monthly(
    df,
    covariates,
    context_length,
    prediction_length,
    cutoff_date,
):
        
    #Establish cutoff date
    if cutoff_date is None:
        date = df.index[-1]
    else:    
        date = pd.to_datetime(cutoff_date)
        print('cutoff date is {}'.format(date))
    
    
    #Combine food category dataframe and covariates
    df = covariates
#     df = df.join(df, covariates, how ='left')

    #Resample to be monthly
    df = df.resample('MS').mean().interpolate()
    quantiles = [0.01, 0.05, 0.1, 0.25, 0.75, 0.9, 0.95, 0.99]
    fh = ForecastingHorizon(list(range(1, prediction_length + 1)))
    
    context_df = df.loc[df.index <= date]

    if 'ds' not in df:
        context_df = format_neuralprophet_inputs(context_df) #Format inputs for neural prophet


    #Initialize the neural prophet model
    model = NeuralProphet(
        n_lags = context_length,
        n_forecasts=prediction_length,
        quantiles=quantiles
    )
    
    # Add each column (except 'y') as a regressor
    for covariate in context_df.columns:
        if covariate not in ['ds', 'y']:
            model.add_lagged_regressor(covariate, n_lags=n_lags)

    #Fit NP model 
    
    print(context_df.columns)
    model.fit(context_df, freq="M")


    #Make predictions using quantiles
    np_future_df = model.make_future_dataframe(context_df, periods=prediction_length)
    fc_series = model.predict(np_future_df, decompose=False, raw=False)

    fc_series

    #Format quantile-based outputs to be consistent with other experiments 
    all_quantiles= {}
    all_pred = pd.DataFrame
    pred_num = prediction_length-1
    for pred_num in range(1,prediction_length+1):
        all_quantiles= {}
        for q in quantiles:
            all_quantiles[f"q_{q}"] = (fc_series['yhat{} {}%'.format(pred_num,q*100)])
            all_quantiles[f"q_{0.5}"] = (fc_series['yhat{}'.format(pred_num)])

            check = pd.DataFrame(all_quantiles)
            check = check[check.notna().all(axis=1)]

        if pred_num == 1: #First run through
            all_pred = check
        else: #All subsequent runs
            all_pred = all_pred.append(check, ignore_index=True)

    forecast_min_date = date + pd.DateOffset(months=1)
    forecast_max_date = date + pd.DateOffset(months=1 * prediction_length + 1)
    forecast_date_index = pd.date_range(
        forecast_min_date, forecast_max_date, freq="1M"
    )

    forecast_df = pd.DataFrame(all_pred).set_index(
        forecast_date_index
    ).resample('MS').mean()

    forecast_df = forecast_df[sorted(forecast_df.columns)]

    context_df = context_df.loc[context_df.ds >= (date - pd.DateOffset(months=context_length))]

    if 'ds' in context_df.columns: #Reformat outputs to match other models
        context_df = reformat_outputs(context_df)

    return context_df, forecast_df

## Load data

Since this is a simple univariate / autoregressive model, we're only loading the 'target' CPI variables here.

In [None]:
foodprice_df = pd.read_csv("./datasets/food_CPI.csv", index_col=0)
foodprice_df.index = pd.to_datetime(foodprice_df.index)
foodprice_df.tail()

In [None]:
# foodprice_df = foodprice_df.merge(all_data, left_index=True, right_index=True, how='left')
# foodprice_df.tail()

In [None]:
target_categories = foodprice_df.columns.to_list()
target_categories

## Define experiment cutoff dates

Our experiment design uses 6 annual cutoff dates that simulate the generation of forecast once per year over the last 6 years. We'll comsume data up to each cutoff date to fit/train models, and then evaluate over the next 18 months. In this notebook, we're only concerned with producing the retrospective forecasts and we'll do the analysis all together in another notebook.

In [None]:
report_sim_dates = open("./data_files/experiment_cutoff_dates.txt", 'r').read().split()
report_sim_dates

## Plot forecast range with context

We could add different elements to plots including some error analysis, emphasis on different quantiles, etc.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def plot_quantile_forecast(category, context_df, forecast_df, actual_df, cutoff_date, save_path=None, show_plots=True):

    fig, ax = plt.subplots(figsize=(10,6))

    # Context
    context_df = pd.concat((context_df, actual_df))
    ax.plot(context_df.index, context_df.values, color='black', label='Historical CPI')

    # Confidence range between 0.01 and 0.99 quantiles
    ax.fill_between(
        forecast_df.index,
        forecast_df[f"q_0.05"],
        forecast_df[f"q_0.95"],
        facecolor='purple',
        alpha=0.5,
        label='95% Confidence'
    )

    # Add a line trace for the median
    ax.plot(forecast_df.index, forecast_df[f"q_0.5"], color='purple', label='Median Forecast')

    # Update the layout as needed
    ax.set_title(f'{category}\nRetrospective Forecast - {cutoff_date}')
    ax.set_xlabel('Date')
    ax.set_ylabel('CPI (% 2002 Prices)')
    ax.axvline(pd.to_datetime(cutoff_date), label="Cutoff Date", color='black', ls='--', ms=1, alpha=0.5)
    ax.legend()
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.xticks(rotation=45)

    # Show the figure
    plt.grid(axis='y')

    if show_plots:
        plt.show()

    # Save the figure if the path is specified
    if save_path:
        fig.savefig(save_path, dpi=300 if save_path.endswith("png") else None) # High res for png


## Main experiment loop

In [None]:
for cutoff_date in report_sim_dates:
    forecast_output_dir = f"./output/experiments/{EXPERIMENT_NAME}/{cutoff_date}/forecasts"
    plot_output_dir = f"./output/experiments/{EXPERIMENT_NAME}/{cutoff_date}/plots"
    pathlib.Path(forecast_output_dir).mkdir(parents=True, exist_ok=True)
    pathlib.Path(plot_output_dir).mkdir(parents=True, exist_ok=True)
    for category in target_categories:
        if 'onlyclimate' in EXPERIMENT_NAME:
            covariates = covariates_all[climate_data.columns.values]
            n_lags = 6
        if 'noclimate' in EXPERIMENT_NAME:
            covariates = covariates_all[all_data.columns.values]
            n_lags=12
        covariates[category] = covariates_all[[category]]

        context_df, forecast_df = get_forecast_NP_monthly(
            foodprice_df[[category]],  # Indexing this way to get a dataframe with 1 column as opposed to a series
            covariates,                #Context df with covariates
            context_length=12*15,      # Context length 
            prediction_length=18,
            cutoff_date=cutoff_date    # Training / fitting cutoff date
        )
        forecast_df.to_csv(f"{forecast_output_dir}/{category}.csv")
        plot_quantile_forecast(
            category=category,         # The target category name
            context_df=context_df,     # Historical data to plot
            forecast_df=forecast_df,   # Quantile forecast dataframe
            actual_df=foodprice_df[category].loc[[date for date in forecast_df.index if date in foodprice_df.index]], # 'actual' data to plot against forecast
            save_path=f"{plot_output_dir}/{category}.svg", 
            cutoff_date=cutoff_date,
            show_plots=False
        )