In [1]:
from neuralprophet import NeuralProphet
import numpy as np
import pandas as pd
import os

In [2]:
%%capture
# create temporary data folder and download data
!rm -r tmp-data
!mkdir tmp-data
!wget -P tmp-data https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/air_passengers.csv
os.listdir('tmp-data')

In [3]:
air_passengers_df = pd.read_csv(os.path.join('tmp-data', 'air_passengers.csv'))
df = air_passengers_df

In [4]:
%%capture
m = NeuralProphet(n_lags=5, n_forecasts=3)
metrics_train = m.fit(df=df, freq="MS")

INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 292
INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 3.01E-02, min: 9.05E-01
INFO - (NP.utils_torch.lr_range_test) - learning rate range test selected lr: 2.91E-01


# Collect in-sample predictions

In [10]:
future = df.copy(deep=True)

## Predictions sorted based on forecast target
Calling `predict`, we get a `df_forecast` where each `'yhat<i>'` refers to the `<i>` -step-ahead prediction for **this row's datetime being the target**.
Here, `<i>`  refers to the age of the prediction.

e.g. `yhat3` is the prediction for this datetime, predicted 3 steps ago, it is "3 steps old".

Note that the last row `1961-3-01` only has a `yhat3`, which was forecasted at the last location with data `1960-12-01`.
Because we lack inputs after that location, we do not have more recent predictions `yhat1` from `1961-2-01` nor  `yhat2` from `1961-1-01`.

We also get the individual forecast components, which also refer to their respective contrigution to `yhat<i>`, forecasted `<i>` steps ago. 

Components without an added number are only time-dependent or based on future regressors, neither are lagged, and thus a single value.

In [11]:
forecast = m.predict(future)
forecast.tail()

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
142,1960-11-01,390.0,409.625458,19.625458,409.470306,19.470306,420.463806,30.463806,-264.339569,-264.49472,-253.501205,707.141907,-33.176899
143,1960-12-01,432.0,422.081604,-9.918396,440.227173,8.227173,440.874268,8.874268,-304.496002,-286.350433,-285.703339,713.896118,12.681476
144,1961-01-01,,452.463287,,444.093292,,459.705872,,-272.876862,-281.246826,-265.634277,720.875427,4.464685
145,1961-02-01,,,,465.577515,,457.13092,,,-236.144577,-244.591156,727.854797,-26.132689
146,1961-03-01,,,,,,525.992676,,,,-189.82428,734.158813,-18.341887


## Predictions based on forecast start
Calling `predict_raw`, we get a `df` where each `'step<i>'` refers to the `<i>`th step-ahead prediction **starting at this row's datetime**.
Here, `<i>`  refers to how many steps ahead the prediction is targeted at.

e.g. `step0` is the prediction for this datetime. `step1` is the prediction for the next datetime. 

All the predictions of a particular row were made at the same time: One step before the rows datestamp.

In [12]:
forecast = m.predict(future, decompose=False, raw=True)
forecast.tail()



Unnamed: 0,ds,step0,step1,step2
135,1960-09-01,510.785583,469.965332,420.463806
136,1960-10-01,466.057037,409.470306,440.874268
137,1960-11-01,409.625458,440.227173,459.705872
138,1960-12-01,422.081604,444.093292,457.13092
139,1961-01-01,452.463287,465.577515,525.992676


Note that the last row contains the last possible forecast, forecasting `1961-1-01`, `1961-2-01` and `1961-3-01` with data available at `1960-12-01`.


Setting `decompose=True` will include the individual forecast components, which also refer to their respective contrigution to `step<i>` into the future. 

In [13]:
forecast = m.predict(future, decompose=True, raw=True)
forecast.tail()



Unnamed: 0,ds,step0,step1,step2,trend0,trend1,trend2,season_yearly0,season_yearly1,season_yearly2,ar0,ar1,ar2
135,1960-09-01,510.785583,469.965332,420.463806,693.408386,700.162598,707.141907,3.742019,-18.733738,-33.176899,-186.364853,-211.463501,-253.501205
136,1960-10-01,466.057037,409.470306,440.874268,700.162598,707.141907,713.896118,-18.733738,-33.176899,12.681476,-215.371811,-264.49472,-285.703339
137,1960-11-01,409.625458,440.227173,459.705872,707.141907,713.896118,720.875427,-33.176899,12.681476,4.464685,-264.339569,-286.350433,-265.634277
138,1960-12-01,422.081604,444.093292,457.13092,713.896118,720.875427,727.854797,12.681476,4.464685,-26.132689,-304.496002,-281.246826,-244.591156
139,1961-01-01,452.463287,465.577515,525.992676,720.875427,727.854797,734.158813,4.464685,-26.132689,-18.341887,-272.876862,-236.144577,-189.82428


### Advanced: Get predictions based on forecast start as arrays
This function was not meant to be used directly, but if you have a specific need, it may be useful to get the values directly as arrays.
The returned predictions are also based on forecast origin.

In [21]:
future = m.prepare_dataframe_to_predict(df)
dates, predicted, components = m._predict_raw(future, include_components=True)

In [22]:
dates[-5:]

140   1960-09-01
141   1960-10-01
142   1960-11-01
143   1960-12-01
144   1961-01-01
Name: ds, dtype: datetime64[ns]

In [23]:
predicted[-5:]

array([[510.78558, 469.96533, 420.4638 ],
       [466.05704, 409.4703 , 440.87427],
       [409.62546, 440.22717, 459.70587],
       [422.0816 , 444.0933 , 457.13092],
       [452.4633 , 465.5775 , 525.9927 ]], dtype=float32)

In [24]:
[(key, values[-5:]) for key, values in components.items()]

[('trend',
  array([[693.4084, 700.1626, 707.1419],
         [700.1626, 707.1419, 713.8961],
         [707.1419, 713.8961, 720.8754],
         [713.8961, 720.8754, 727.8548],
         [720.8754, 727.8548, 734.1588]], dtype=float32)),
 ('season_yearly',
  array([[  3.7420192, -18.733738 , -33.1769   ],
         [-18.733738 , -33.1769   ,  12.681476 ],
         [-33.1769   ,  12.681476 ,   4.4646854],
         [ 12.681476 ,   4.4646854, -26.132689 ],
         [  4.4646854, -26.132689 , -18.341887 ]], dtype=float32)),
 ('ar',
  array([[-186.36485, -211.4635 , -253.5012 ],
         [-215.37181, -264.49472, -285.70334],
         [-264.33957, -286.35043, -265.63428],
         [-304.496  , -281.24683, -244.59116],
         [-272.87686, -236.14458, -189.82428]], dtype=float32))]

# Collect out-of-sample predictions
This is how you can extend predictions into the unknown future:

In [35]:
future = m.make_future_dataframe(df) # periods=m.n_forecasts, n_historic_predictions=False

Now, the forecast dataframe only contains predictions about the yet unobserved future.

## Predictions based on forecast target

In [36]:
forecast = m.predict(future)
forecast.tail(3)

INFO - (NP.forecaster._handle_missing_data) - 3 NaN values in column y were auto-imputed.


Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
8,1961-04-01,,548.791016,,528.551514,,527.593994,,-162.941544,-183.181107,-184.138565,741.138062,-29.405451
9,1961-05-01,,,,547.359619,,542.048462,,,-175.251038,-180.562164,747.892273,-25.281633
10,1961-06-01,,,,,,587.958862,,,,-182.171585,754.871582,15.258847


## Predictions based on forecast start
We can also get the forecasts based on the forecast start

In [40]:
forecast = m.predict(future, raw=True, decompose=False)
forecast

INFO - (NP.forecaster._handle_missing_data) - 3 NaN values in column y were auto-imputed.


Unnamed: 0,ds,step0,step1,step2
0,1961-01-01,452.463287,465.577515,525.992676
1,1961-02-01,482.322327,524.370728,527.593994
2,1961-03-01,530.041992,528.551514,542.048462
3,1961-04-01,548.791016,547.359619,587.958862


... and as an array

In [42]:
dates, predicted, components = m._predict_raw(future)
predicted

array([[452.4633 , 465.57755, 525.9927 ]], dtype=float32)

In [43]:
# remove temporary data folder.
!rm -r tmp-data

rm: cannot remove 'tmp-data': No such file or directory
