In [1]:
from neuralprophet import NeuralProphet
import numpy as np
import pandas as pd
import os

In [2]:
# %%capture
# # create temporary data folder and download data
# !rm -r tmp-data
# !mkdir tmp-data
# !wget -P tmp-data https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/air_passengers.csv
# os.listdir('tmp-data')

In [3]:
# air_passengers_df = pd.read_csv(os.path.join('tmp-data', 'air_passengers.csv'))
# df = air_passengers_df

df = pd.read_csv("../../tests/test-data/air_passengers.csv")
df.tail(3)

Unnamed: 0,ds,y
141,1960-10-01,461
142,1960-11-01,390
143,1960-12-01,432


In [4]:
%%capture
m = NeuralProphet(n_lags=5, n_forecasts=3)
metrics_train = m.fit(df=df, freq="MS")

INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 292
INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 4.06E-02, min: 1.22E+00
INFO - (NP.utils_torch.lr_range_test) - learning rate range test selected lr: 3.93E-01


# Collect in-sample predictions

## Predictions sorted based on forecast target
Calling `predict`, we get a `df_forecast` where each `'yhat<i>'` refers to the `<i>` -step-ahead prediction for **this row's datetime being the target**.
Here, `<i>`  refers to the age of the prediction.

e.g. `yhat3` is the prediction for this datetime, predicted 3 steps ago, it is "3 steps old".

Note that the last row `1961-3-01` only has a `yhat3`, which was forecasted at the last location with data `1960-12-01`.
Because we lack inputs after that location, we do not have more recent predictions `yhat1` from `1961-2-01` nor  `yhat2` from `1961-1-01`.

We also get the individual forecast components, which also refer to their respective contrigution to `yhat<i>`, forecasted `<i>` steps ago. 

Components without an added number are only time-dependent or based on future regressors, neither are lagged, and thus a single value.

In [5]:
df = pd.read_csv("../../tests/test-data/air_passengers.csv")
forecast = m.predict(df)
forecast.tail(3)

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
141,1960-10-01,461,464.921753,3.921753,469.071228,8.071228,477.686005,16.686005,-213.783478,-209.634033,-201.019241,697.732727,-19.027452
142,1960-11-01,390,409.787842,19.787842,409.886993,19.886993,421.01889,31.01889,-260.855469,-260.756317,-249.62442,704.643738,-34.00045
143,1960-12-01,432,421.754639,-10.245361,440.603424,8.603424,441.667572,9.667572,-301.536835,-282.688049,-281.623901,711.331787,11.959694


## Predictions based on forecast start
Calling `predict_raw`, we get a `df` where each `'step<i>'` refers to the `<i>`th step-ahead prediction **starting at this row's datetime**.
Here, `<i>`  refers to how many steps ahead the prediction is targeted at.

e.g. `step0` is the prediction for this datetime. `step1` is the prediction for the next datetime. 

All the predictions of a particular row were made at the same time: One step before the rows datestamp.

In [6]:
df = pd.read_csv("../../tests/test-data/air_passengers.csv")
forecast = m.predict(df, decompose=False, raw=True)
forecast.tail(3)



Unnamed: 0,ds,step0,step1,step2
136,1960-10-01,464.921753,409.886993,441.667572
137,1960-11-01,409.787842,440.603424,458.95874
138,1960-12-01,421.754639,442.95578,457.078308


Note that the last row contains the last possible forecast, forecasting `1961-1-01`, `1961-2-01` and `1961-3-01` with data available at `1960-12-01`.


Setting `decompose=True` will include the individual forecast components, which also refer to their respective contrigution to `step<i>` into the future. 

In [7]:
df = pd.read_csv("../../tests/test-data/air_passengers.csv")
forecast = m.predict(df, decompose=True, raw=True)
forecast.tail(3)



Unnamed: 0,ds,step0,step1,step2,trend0,trend1,trend2,season_yearly0,season_yearly1,season_yearly2,ar0,ar1,ar2
136,1960-10-01,464.921753,409.886993,441.667572,697.732727,704.643738,711.331787,-19.027452,-34.00045,11.959694,-213.783478,-260.756317,-281.623901
137,1960-11-01,409.787842,440.603424,458.95874,704.643738,711.331787,718.242798,-34.00045,11.959694,4.136388,-260.855469,-282.688049,-263.420441
138,1960-12-01,421.754639,442.95578,457.078308,711.331787,718.242798,725.153809,11.959694,4.136388,-25.672066,-301.536835,-279.423401,-242.403442


# Collect out-of-sample predictions
This is how you can extend predictions into the unknown future:

In [8]:
df = pd.read_csv("../../tests/test-data/air_passengers.csv")
future = m.make_future_dataframe(df, periods=3) # periods=m.n_forecasts, n_historic_predictions=False

Now, the forecast dataframe only contains predictions about the yet unobserved future.

## Predictions based on forecast target

In [9]:
forecast = m.predict(future)
forecast.tail(3)

Unnamed: 0,ds,y,yhat1,residual1,yhat2,residual2,yhat3,residual3,ar1,ar2,ar3,trend,season_yearly
5,1961-01-01,,450.855988,,,,,,-271.523163,,,718.242798,4.136388
6,1961-02-01,,,,465.454742,,,,,-234.027008,,725.153809,-25.672066
7,1961-03-01,,,,,,524.397339,,,,-189.4702,731.396057,-17.528477


## Predictions based on forecast start
We can also get the forecasts based on the forecast start

In [10]:
forecast = m.predict(future, raw=True, decompose=False)
forecast



Unnamed: 0,ds,step0,step1,step2
0,1961-01-01,450.855988,465.454742,524.397339


### Advanced: Get predictions based on forecast start as arrays
This function was not meant to be used directly, but if you have a specific need, it may be useful to get the values directly as arrays.
The returned predictions are also based on forecast origin.

... and as an array

In [11]:
dates, predicted, components = m._predict_raw(future, include_components=True)

In [12]:
dates[-3:]

5   1961-01-01
Name: ds, dtype: datetime64[ns]

In [13]:
predicted[-3:]

array([[450.856  , 465.45474, 524.39734]], dtype=float32)

In [14]:
[(key, values[-3:]) for key, values in components.items()]

[('trend', array([[718.2428 , 725.1538 , 731.39606]], dtype=float32)),
 ('season_yearly',
  array([[  4.1363883, -25.672066 , -17.528477 ]], dtype=float32)),
 ('ar', array([[-271.52316, -234.02701, -189.4702 ]], dtype=float32))]

In [15]:
# remove temporary data folder.
!rm -r tmp-data

rm: cannot remove 'tmp-data': No such file or directory
