In [1]:
from neuralprophet import NeuralProphet
import numpy as np
import pandas as pd
import os

In [2]:
# %%capture
# # create temporary data folder and download data
# !rm -r tmp-data
# !mkdir tmp-data
# !wget -P tmp-data https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/air_passengers.csv
# os.listdir('tmp-data')

In [4]:
# air_passengers_df = pd.read_csv(os.path.join('tmp-data', 'air_passengers.csv'))
# df = air_passengers_df

df = pd.read_csv("../../tests/test-data/air_passengers.csv")

In [5]:
%%capture
m = NeuralProphet(n_lags=5, n_forecasts=3)
metrics_train = m.fit(df=df, freq="MS")

INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 292
INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 3.67E-02, min: 2.23E+00
INFO - (NP.utils_torch.lr_range_test) - learning rate range test selected lr: 5.67E-01


# Collect in-sample predictions

In [6]:
future = df.copy(deep=True)

## Predictions sorted based on forecast target
Calling `predict`, we get a `df_forecast` where each `'yhat<i>'` refers to the `<i>` -step-ahead prediction for **this row's datetime being the target**.
Here, `<i>`  refers to the age of the prediction.

e.g. `yhat3` is the prediction for this datetime, predicted 3 steps ago, it is "3 steps old".

Note that the last row `1961-3-01` only has a `yhat3`, which was forecasted at the last location with data `1960-12-01`.
Because we lack inputs after that location, we do not have more recent predictions `yhat1` from `1961-2-01` nor  `yhat2` from `1961-1-01`.

We also get the individual forecast components, which also refer to their respective contrigution to `yhat<i>`, forecasted `<i>` steps ago. 

Components without an added number are only time-dependent or based on future regressors, neither are lagged, and thus a single value.

In [7]:
forecast = m.predict(future)
forecast.tail()

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
142,1960-11-01,390.0,409.930786,19.930786,408.954346,18.954346,420.211639,30.211639,-260.531158,-261.507629,-250.250305,703.888672,-33.426682
143,1960-12-01,432.0,422.386444,-9.613556,440.352509,8.352509,440.08432,8.08432,-300.877869,-282.911774,-283.179962,710.580261,12.684018
144,1961-01-01,,451.373566,,444.720123,,458.65033,,-270.423492,-277.076935,-263.146729,717.494934,4.3021
145,1961-02-01,,,,463.23172,,456.139069,,,-235.507568,-242.600174,724.409668,-25.67042
146,1961-03-01,,,,,,524.548218,,,,-187.90477,730.655334,-18.20232


## Predictions based on forecast start
Calling `predict_raw`, we get a `df` where each `'step<i>'` refers to the `<i>`th step-ahead prediction **starting at this row's datetime**.
Here, `<i>`  refers to how many steps ahead the prediction is targeted at.

e.g. `step0` is the prediction for this datetime. `step1` is the prediction for the next datetime. 

All the predictions of a particular row were made at the same time: One step before the rows datestamp.

In [8]:
forecast = m.predict(future, decompose=False, raw=True)
forecast.tail()



Unnamed: 0,ds,step0,step1,step2
135,1960-09-01,510.894653,470.128998,420.211639
136,1960-10-01,465.815521,408.954346,440.08432
137,1960-11-01,409.930786,440.352509,458.65033
138,1960-12-01,422.386444,444.720123,456.139069
139,1961-01-01,451.373566,463.23172,524.548218


Note that the last row contains the last possible forecast, forecasting `1961-1-01`, `1961-2-01` and `1961-3-01` with data available at `1960-12-01`.


Setting `decompose=True` will include the individual forecast components, which also refer to their respective contrigution to `step<i>` into the future. 

In [9]:
forecast = m.predict(future, decompose=True, raw=True)
forecast.tail()



Unnamed: 0,ds,step0,step1,step2,trend0,trend1,trend2,season_yearly0,season_yearly1,season_yearly2,ar0,ar1,ar2
135,1960-09-01,510.894653,470.128998,420.211639,690.282227,696.973877,703.888672,3.070502,-19.491608,-33.426682,-182.458145,-207.353271,-250.250305
136,1960-10-01,465.815521,408.954346,440.08432,696.973877,703.888672,710.580261,-19.491608,-33.426682,12.684018,-211.666763,-261.507629,-283.179962
137,1960-11-01,409.930786,440.352509,458.65033,703.888672,710.580261,717.494934,-33.426682,12.684018,4.3021,-260.531158,-282.911774,-263.146729
138,1960-12-01,422.386444,444.720123,456.139069,710.580261,717.494934,724.409668,12.684018,4.3021,-25.67042,-300.877869,-277.076935,-242.600174
139,1961-01-01,451.373566,463.23172,524.548218,717.494934,724.409668,730.655334,4.3021,-25.67042,-18.20232,-270.423492,-235.507568,-187.90477


### Advanced: Get predictions based on forecast start as arrays
This function was not meant to be used directly, but if you have a specific need, it may be useful to get the values directly as arrays.
The returned predictions are also based on forecast origin.

In [10]:
future = m.prepare_dataframe_to_predict(df)
dates, predicted, components = m._predict_raw(future, include_components=True)

In [11]:
dates[-5:]

137   1960-06-01
138   1960-07-01
139   1960-08-01
140   1960-09-01
141   1960-10-01
Name: ds, dtype: datetime64[ns]

In [12]:
predicted[-5:]

array([[526.6588 , 570.90094, 563.1465 ],
       [586.65533, 576.89435, 511.73834],
       [583.13544, 524.6257 , 478.01617],
       [510.89465, 470.129  , 420.21164],
       [465.81552, 408.95435, 440.08432]], dtype=float32)

In [13]:
[(key, values[-5:]) for key, values in components.items()]

[('trend',
  array([[669.7612 , 676.4528 , 683.3676 ],
         [676.4528 , 683.3676 , 690.2822 ],
         [683.3676 , 690.2822 , 696.9739 ],
         [690.2822 , 696.9739 , 703.8887 ],
         [696.9739 , 703.8887 , 710.58026]], dtype=float32)),
 ('season_yearly',
  array([[ 16.571056,  58.133564,  58.72041 ],
         [ 58.133564,  58.72041 ,   3.070502],
         [ 58.72041 ,   3.070502, -19.491608],
         [  3.070502, -19.491608, -33.42668 ],
         [-19.491608, -33.42668 ,  12.684018]], dtype=float32)),
 ('ar',
  array([[-159.67343, -163.68549, -178.94156],
         [-147.93109, -165.19366, -181.6144 ],
         [-158.95258, -168.72707, -199.4661 ],
         [-182.45815, -207.35327, -250.2503 ],
         [-211.66676, -261.50763, -283.17996]], dtype=float32))]

# Collect out-of-sample predictions
This is how you can extend predictions into the unknown future:

In [14]:
future = m.make_future_dataframe(df) # periods=m.n_forecasts, n_historic_predictions=False

Now, the forecast dataframe only contains predictions about the yet unobserved future.

## Predictions based on forecast target

In [15]:
forecast = m.predict(future)
forecast.tail(3)

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
5,1961-01-01,,451.373596,,,,,,-270.423462,,,717.494934,4.3021
6,1961-02-01,,,,463.23172,,,,,-235.507568,,724.409668,-25.67042
7,1961-03-01,,,,,,524.548218,,,,-187.90477,730.655334,-18.20232


## Predictions based on forecast start
We can also get the forecasts based on the forecast start

In [16]:
forecast = m.predict(future, raw=True, decompose=False)
forecast



Unnamed: 0,ds,step0,step1,step2
0,1961-01-01,451.373596,463.23172,524.548218


... and as an array

In [17]:
dates, predicted, components = m._predict_raw(future)
predicted

array([[451.3736 , 463.23172, 524.5482 ]], dtype=float32)

In [18]:
# remove temporary data folder.
!rm -r tmp-data