In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from modularprophet.containers import Model, Sequential, Ensemble
from modularprophet.compositions import Additive, Stationary
from modularprophet.components import Trend, Regressor
from modularprophet.modules.ar_net import LaggedNet
from modularprophet.modules.seasonality import FourierSeasonality
from modularprophet.forecaster import ModularProphet

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/air_passengers.csv")

In [None]:
m = ModularProphet(
    Sequential(
        Trend(),
        FourierSeasonality("yearly", period=365.25, series_order = 5, growth="linear"),
        #LaggedNet(n_lags=5)
    )
)
m

In [None]:
m.fit(df, n_forecasts=10, learning_rate=0.1)

In [None]:
x, y, y_hat, components = m.predict(df[:15])

In [None]:
pd.DataFrame(y).T.plot()

In [None]:
pd.DataFrame(y_hat.T).plot()

### Convert Predictions to Readable Format

In [None]:
def array_to_df(name, array):
    df = pd.DataFrame(array)
    df.columns = [f"{name}_({c + 1})" for c in df.columns]
    return df

def long_to_staircase_df(df):
    n_shifts = ["".join(filter(str.isdigit, c)) for c in list(df.columns)]
    n_shifts = [int(c)-1 if c != "" else 0 for c in n_shifts]
    for i in range(max(n_shifts)):
        df = pd.concat([df, pd.Series(np.nan, dtype=np.float32)], axis=0, ignore_index=True)
    for col, shifts in zip(df.columns, n_shifts):
        df[col] = df[col].shift(shifts)
    df.drop([0], axis=1, inplace=True)
    return df

In [None]:
prediction_df = {name: array_to_df(name, c) for name, c in components.items()}
prediction_df = pd.concat(prediction_df.values(), axis=1)
staircase_df = long_to_staircase_df(prediction_df)

In [None]:
y_df = pd.concat([pd.DataFrame(y[:,0], columns=["y"], dtype=np.float32), pd.DataFrame(y[-1][1:], columns=["y"])], axis=0, ignore_index=True)
x_df = pd.concat([pd.DataFrame(x[:,0], columns=["x"], dtype=np.float32), pd.DataFrame(x[-1][1:], columns=["x"])], axis=0, ignore_index=True)

staircase_df = pd.concat([
    staircase_df,
    y_df,
    x_df,
], axis=1)

In [None]:
staircase_df

In [None]:
staircase_df.plot(x="x", colormap="viridis_r")

### Evaluate Average Error per Forecast Step

In [None]:
error_df = staircase_df.copy()
for col in [c for c in error_df.columns if c != "x"]:
    error_df[col] = (error_df[col] - staircase_df["y"])**2
res = error_df.mean()
res[[c for c in error_df.columns if "y_hat" in c]].plot.bar()

### Backtesting with rolling origin and re-fitting

In [None]:
def calculate_metrics(y, y_hat, naive, seasonal_naive):
    mae = 1/len(y) * np.sum(np.abs(y - y_hat))
    rmse = np.sqrt(1/len(y) *np.sum((y - y_hat)**2))
    mase = 1/len(y) * np.sum(np.abs(y - y_hat) / (1/len(y) * np.sum(np.abs(y - naive))))
    smase = 1/len(y) * np.sum(np.abs(y - y_hat) / (1/len(y) * np.sum(np.abs(y - seasonal_naive))))
    mape = 1/len(y) * np.sum(np.abs(y - y_hat) / y)
    return {"MAE": mae, "RMSE": rmse, "MASE": mase, "sMASE": smase, "MAPE": mape}

def naive_forecasts(train_df, seasonal_cycle, n_forecasts):
    # Extract the last value from the training set and repeat it n_forecasts times
    naive = train_df["y"].tail(1).values * np.ones(n_forecasts)
    # Extract the last seasonal_cycle values from the training set and repeat them n_forecasts // seasonal_cycle + 1 times
    seasonal_naive = np.tile(train_df["y"].tail(seasonal_cycle).values, n_forecasts // seasonal_cycle + 1)[:n_forecasts]
    return naive, seasonal_naive

def split_data(df, i, holdout, n_forecasts, n_lags):
    train_df = df[i:-holdout+n_forecasts+i]
    eval_df = df[-holdout+n_forecasts-n_lags+i:].head(n_forecasts + n_lags)
    y = eval_df["y"].values[-n_forecasts:]
    return train_df, eval_df, y

In [None]:
n_forecasts = 10
n_lags = 6
holdout = 44
seasonal_cycle = 12
num_splits = (holdout - 2 * n_forecasts) + 1
metrics = {}
predictions = []

for i in range(0, num_splits):
    train_df, eval_df, y = split_data(df, i, holdout, n_forecasts, n_lags)
    m = ModularProphet(
        Additive(
            Trend(),
            FourierSeasonality("yearly", period=365.25, series_order = 5, growth="linear"), # growth="linear" #multiply_with="Trend"
            LaggedNet(n_lags=n_lags)
        )
    )
    m.fit(train_df, n_forecasts=10, learning_rate=0.1) # optimizer="adam",
    # Evaluation
    _, _, y_hat, _ = m.predict(eval_df)
    prediction = y_hat.squeeze(0)
    predictions.append(np.pad(prediction, (i, num_splits-i), mode="constant", constant_values=(np.nan)))
    # Metrics
    naive, seasonal_naive = naive_forecasts(train_df, seasonal_cycle, n_forecasts)
    metrics[i] = calculate_metrics(y, prediction, naive, seasonal_naive)

In [None]:
predictions_combined = pd.DataFrame(np.array(predictions)).T[:-1]
predictions_combined["y"] = df[-len(predictions_combined):]["y"].values
predictions_combined.plot(colormap="Blues")

In [None]:
pd.DataFrame(metrics).T[["MAE", "RMSE"]].plot.bar()

In [None]:
pd.DataFrame(metrics).T[["MAPE"]].plot.bar()

In [None]:
pd.DataFrame(metrics).T[["MASE", "sMASE"]].plot.bar()