In [1]:
from neuralprophet import NeuralProphet
import numpy as np
import pandas as pd
import os

In [2]:
%%capture
# create temporary data folder and download data
!rm -r tmp-data
!mkdir tmp-data
!wget -P tmp-data https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/air_passengers.csv
os.listdir('tmp-data')

In [3]:
air_passengers_df = pd.read_csv(os.path.join('tmp-data', 'air_passengers.csv'))
df = air_passengers_df

In [4]:
%%capture
m = NeuralProphet(n_lags=5, n_forecasts=3)
metrics_train = m.fit(df=df, freq="MS")

INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 292
INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 4.06E-02, min: 3.01E+00
INFO - (NP.utils_torch.lr_range_test) - learning rate range test selected lr: 7.16E-01


# Collect in-sample predictions

In [5]:
future = m.make_future_dataframe(df, periods=0, n_historic_predictions=True)

## Predictions sorted based on forecast target
Calling `predict`, we get a `df_forecast` where each `'yhat<i>'` refers to the `<i>` -step-ahead prediction for **this row's datetime being the target**.
Here, `<i>`  refers to the age of the prediction.

e.g. `yhat3` is the prediction for this datetime, predicted 3 steps ago, it is "3 steps old".

Note that the last row `1961-3-01` only has a `yhat3`, which was forecasted at the last location with data `1960-12-01`.
Because we lack inputs after that location, we do not have more recent predictions `yhat1` from `1961-2-01` nor  `yhat2` from `1961-1-01`.

We also get the individual forecast components, which also refer to their respective contrigution to `yhat<i>`, forecasted `<i>` steps ago. 

Components without an added number are only time-dependent or based on future regressors, neither are lagged, and thus a single value.

In [6]:
forecast = m.predict(future)
forecast.tail()

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
142,1960-11-01,390.0,408.390564,18.390564,408.735382,18.735382,419.024658,29.024658,-260.954346,-260.609558,-250.320267,702.926758,-33.581821
143,1960-12-01,432.0,420.639557,-11.360443,439.180298,7.180298,439.773865,7.773865,-302.058746,-283.518036,-282.924438,709.604553,13.093771
144,1961-01-01,,452.548798,,444.073608,,458.34201,,-267.920441,-276.39563,-262.127197,716.504822,3.964399
145,1961-02-01,,,,464.460388,,456.117157,,,-233.840622,-242.183838,723.405273,-25.104242
146,1961-03-01,,,,,,525.546753,,,,-185.732697,729.637878,-18.358429


## Predictions based on forecast start
Calling `predict_raw`, we get a `df` where each `'step<i>'` refers to the `<i>`th step-ahead prediction **starting at this row's datetime**.
Here, `<i>`  refers to how many steps ahead the prediction is targeted at.

e.g. `step0` is the prediction for this datetime. `step1` is the prediction for the next datetime. 

All the predictions of a particular row were made at the same time: One step before the rows datestamp.

In [7]:
forecast = m.predict_raw(future)
forecast.tail()

Unnamed: 0,ds,step0,step1,step2
135,1960-09-01,509.9552,470.335876,419.024658
136,1960-10-01,465.349365,408.735382,439.773865
137,1960-11-01,408.390564,439.180298,458.34201
138,1960-12-01,420.639557,444.073608,456.117157
139,1961-01-01,452.548798,464.460388,525.546753


Note that the last row contains the last possible forecast, forecasting `1961-1-01`, `1961-2-01` and `1961-3-01` with data available at `1960-12-01`.


Setting `decompose=True` will include the individual forecast components, which also refer to their respective contrigution to `step<i>` into the future. 

In [8]:
forecast = m.predict_raw(future, decompose=True)
forecast.tail()

Unnamed: 0,ds,step0,step1,step2,trend0,trend1,trend2,season_yearly0,season_yearly1,season_yearly2,ar0,ar1,ar2
135,1960-09-01,509.9552,470.335876,419.024658,689.348755,696.026489,702.926758,3.116546,-19.381254,-33.581821,-182.510101,-206.309357,-250.320267
136,1960-10-01,465.349365,408.735382,439.773865,696.026489,702.926758,709.604553,-19.381254,-33.581821,13.093771,-211.295868,-260.609558,-282.924438
137,1960-11-01,408.390564,439.180298,458.34201,702.926758,709.604553,716.504822,-33.581821,13.093771,3.964399,-260.954346,-283.518036,-262.127197
138,1960-12-01,420.639557,444.073608,456.117157,709.604553,716.504822,723.405273,13.093771,3.964399,-25.104242,-302.058746,-276.39563,-242.183838
139,1961-01-01,452.548798,464.460388,525.546753,716.504822,723.405273,729.637878,3.964399,-25.104242,-18.358429,-267.920441,-233.840622,-185.732697


### Advanced: Get predictions based on forecast start as arrays
This function was not meant to be used directly, but if you have a specific need, it may be useful to get the values directly as arrays.
The returned predictions are also based on forecast origin.

In [9]:
dates, predicted, components = m._predict_raw(future, include_components=True)

In [10]:
dates[-5:]

140   1960-09-01
141   1960-10-01
142   1960-11-01
143   1960-12-01
144   1961-01-01
Name: ds, dtype: datetime64[ns]

In [11]:
predicted[-5:]

array([[509.9552 , 470.33588, 419.02466],
       [465.34937, 408.73538, 439.77386],
       [408.39056, 439.1803 , 458.342  ],
       [420.63956, 444.0736 , 456.11716],
       [452.5488 , 464.4604 , 525.54675]], dtype=float32)

In [12]:
[(key, values[-5:]) for key, values in components.items()]

[('trend',
  array([[689.34875, 696.0265 , 702.92676],
         [696.0265 , 702.92676, 709.60455],
         [702.92676, 709.60455, 716.5048 ],
         [709.60455, 716.5048 , 723.4053 ],
         [716.5048 , 723.4053 , 729.6379 ]], dtype=float32)),
 ('season_yearly',
  array([[  3.1165464, -19.381254 , -33.58182  ],
         [-19.381254 , -33.58182  ,  13.093771 ],
         [-33.58182  ,  13.093771 ,   3.9643986],
         [ 13.093771 ,   3.9643986, -25.104242 ],
         [  3.9643986, -25.104242 , -18.358429 ]], dtype=float32)),
 ('ar',
  array([[-182.5101 , -206.30936, -250.32027],
         [-211.29587, -260.60956, -282.92444],
         [-260.95435, -283.51804, -262.1272 ],
         [-302.05875, -276.39563, -242.18384],
         [-267.92044, -233.84062, -185.7327 ]], dtype=float32))]

# Collect out-of-sample predictions
This is how you can extend predictions into the unknown future:

In [13]:
future = m.make_future_dataframe(df) # periods=m.n_forecasts, n_historic_predictions=False

Now, the forecast dataframe only contains predictions about the yet unobserved future.

## Predictions based on forecast target

In [14]:
forecast = m.predict(future)
forecast.tail(3)

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
5,1961-01-01,,452.548798,,,,,,-267.920441,,,716.504822,3.964399
6,1961-02-01,,,,464.460388,,,,,-233.840622,,723.405273,-25.104242
7,1961-03-01,,,,,,525.546753,,,,-185.732697,729.637878,-18.358429


## Predictions based on forecast start
We can also get the forecasts based on the forecast start

In [15]:
forecast = m.predict_raw(future)
forecast

Unnamed: 0,ds,step0,step1,step2
0,1961-01-01,452.548798,464.460388,525.546753


... and as an array

In [16]:
dates, predicted, components = m._predict_raw(future)
predicted

array([[452.5488 , 464.4604 , 525.54675]], dtype=float32)

In [17]:
# remove temporary data folder.
!rm -r tmp-data