In [1]:
import os
import torch

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly_resampler import unregister_plotly_resampler

from neuralprophet import NeuralProphet

In [2]:
def create_metrics_plot(metrics):
    # Deactivate the resampler since it is not compatible with kaleido (image export)
    unregister_plotly_resampler()

    # Plotly params
    prediction_color = "#2d92ff"
    actual_color = "black"
    line_width = 2
    xaxis_args = {"showline": True, "mirror": True, "linewidth": 1.5, "showgrid": False}
    yaxis_args = {
        "showline": True,
        "mirror": True,
        "linewidth": 1.5,
        "showgrid": False,
        "rangemode": "tozero",
        "type": "log",
    }
    layout_args = {
        "autosize": True,
        "template": "plotly_white",
        "margin": go.layout.Margin(l=0, r=10, b=0, t=30, pad=0),
        "font": dict(size=10),
        "title": dict(font=dict(size=10)),
        "width": 1000,
        "height": 200,
    }

    metric_cols = [col for col in metrics.columns if not ("_val" in col or col == "RegLoss" or col == "epoch")]
    fig = make_subplots(rows=1, cols=len(metric_cols), subplot_titles=metric_cols)
    for i, metric in enumerate(metric_cols):
        fig.add_trace(
            go.Scatter(
                y=metrics[metric],
                name=metric,
                mode="lines",
                line=dict(color=prediction_color, width=line_width),
                legendgroup=metric,
            ),
            row=1,
            col=i + 1,
        )
        if f"{metric}_val" in metrics.columns:
            fig.add_trace(
                go.Scatter(
                    y=metrics[f"{metric}_val"],
                    name=f"{metric}_val",
                    mode="lines",
                    line=dict(color=actual_color, width=line_width),
                    legendgroup=metric,
                ),
                row=1,
                col=i + 1,
            )
        if metric == "Loss":
            fig.add_trace(
                go.Scatter(
                    y=metrics["RegLoss"],
                    name="RegLoss",
                    mode="lines",
                    line=dict(color=actual_color, width=line_width),
                    legendgroup=metric,
                ),
                row=1,
                col=i + 1,
            )
    fig.update_xaxes(xaxis_args)
    fig.update_yaxes(yaxis_args)
    fig.update_layout(layout_args)
    return fig

In [3]:
DIR = "~/github/neural_prophet"
DATA_DIR = os.path.join(DIR, "tests", "test-data")
PEYTON_FILE = os.path.join(DATA_DIR, "wp_log_peyton_manning.csv")
AIR_FILE = os.path.join(DATA_DIR, "air_passengers.csv")
YOS_FILE = os.path.join(DATA_DIR, "yosemite_temps.csv")
ENERGY_PRICE_DAILY_FILE = os.path.join(DATA_DIR, "tutorial04_kaggle_energy_daily_temperature.csv")

In [4]:
df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
df["temp"] = df["temperature"]
df = df.drop(columns="temperature")
df["ds"] = pd.to_datetime(df["ds"])
df["y"] = pd.to_numeric(df["y"], errors="coerce")

df = df.drop("ds", axis=1)
df["ds"] = pd.date_range(start="2015-01-01 00:00:00", periods=len(df), freq="H")
df["ID"] = "test"

df_id = df[["ds", "y", "temp"]].copy()
df_id["ID"] = "test2"
df_id["y"] = df_id["y"] * 0.3
df_id["temp"] = df_id["temp"] * 0.4
df = pd.concat([df, df_id], ignore_index=True)

# Conditional Seasonality
df["winter"] = np.where(
    df["ds"].dt.month.isin([1]),
    1,
    0,
)
df["summer"] = np.where(df["ds"].dt.month.isin([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), 1, 0)
df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
df["summer"] = pd.to_numeric(df["summer"], errors="coerce")

# Normalize Temperature
df["temp"] = (df["temp"] - 65.0) / 50.0

# df
df = df[["ID", "ds", "y", "temp", "winter", "summer"]]

In [5]:
### Temporary Test for on-the-fly sampling - very time consuming!


# Hyperparameter
tuned_params = {
    "n_lags": 10,
    "newer_samples_weight": 2.0,
    "n_changepoints": 0,
    "yearly_seasonality": 10,
    "weekly_seasonality": True,
    "daily_seasonality": False,  # due to conditional daily seasonality
    "batch_size": 128,
    "ar_layers": [8, 4],
    "lagged_reg_layers": [8],
    # not tuned
    "n_forecasts": 5,
    "learning_rate": 0.001,
    "epochs": 10,
    "trend_global_local": "global",
    "season_global_local": "global",
    "drop_missing": True,
    "normalize": "standardize",
}

# Uncertainty Quantification
confidence_lv = 0.98
quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]

# Check if GPU is available
use_gpu = torch.cuda.is_available()

# Set trainer configuration
trainer_configs = {
    "accelerator": "gpu" if use_gpu else "cpu",
}
print(f"Using {'GPU' if use_gpu else 'CPU'}")

# Model
m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)

# Lagged Regressor
m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")

# Conditional Seasonality
m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")

# Holidays
m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)

Using CPU


<neuralprophet.forecaster.NeuralProphet at 0x7b48ec932690>

In [6]:
# Split
df_train = df[df["ds"] < "2015-03-01"]
df_test = df[df["ds"] >= "2015-03-01"]

# Training & Predict
metrics = m.fit(df=df_train, validation_df=df_test, freq="H", num_workers=4, early_stopping=False)

INFO - (NP.forecaster.fit) - When Global modeling with local normalization, metrics are displayed in normalized scale.
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.929% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.929% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.utils.configure_trainer) - Using accelerator cpu with 1 device(s).


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [7]:
create_metrics_plot(metrics)

In [8]:
metrics.to_dict("records")[-1]

{'MAE_val': 1.7389646768569946,
 'RMSE_val': 1.914405107498169,
 'Loss_val': 3.2401645183563232,
 'RegLoss_val': 0.0,
 'epoch': 9,
 'MAE': 0.9105463027954102,
 'RMSE': 1.1649360656738281,
 'Loss': 1.2888280153274536,
 'RegLoss': 0.0}

In [9]:
metrics.tail(1)

Unnamed: 0,MAE_val,RMSE_val,Loss_val,RegLoss_val,epoch,MAE,RMSE,Loss,RegLoss
9,1.738965,1.914405,3.240165,0.0,9,0.910546,1.164936,1.288828,0.0


In [10]:
forecast = m.predict(df)

INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H


INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.data.processing._handle_missing_data) - Dropped 5 rows at the end with NaNs in 'y' column.
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.data.processing._handle_missing_data) - Dropped 5 rows at the end with NaNs in 'y' column.


Predicting: 22it [00:00, ?it/s]

Predicting: 22it [00:00, ?it/s]

In [13]:
m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts)
m.plot(forecast, df_name="test")

INFO - (NP.forecaster.plot) - Plotting data from ID test


FigureWidgetResampler({
    'data': [{'fillcolor': 'rgba(45, 146, 255, 0.2)',
              'line': {'color': 'rgba(45, 146, 255, 0.2)', 'width': 1},
              'mode': 'lines',
              'name': '<b style="color:sandybrown">[R]</b> yhat5 1.0% <i style="color:#fc9944">~1h</i>',
              'type': 'scatter',
              'uid': 'aebc484d-c130-47bd-8870-268071f0b3d5',
              'x': array([datetime.datetime(2015, 1, 2, 13, 0),
                          datetime.datetime(2015, 1, 2, 14, 0),
                          datetime.datetime(2015, 1, 2, 15, 0), ...,
                          datetime.datetime(2015, 3, 2, 17, 0),
                          datetime.datetime(2015, 3, 2, 18, 0),
                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),
              'y': array([62.35801 , 58.90128 , 49.21923 , ..., 50.683945, 56.553596, 58.41175 ],
                         dtype=float32)},
             {'fill': 'tonexty',
              'fillcolor': 'rgba(45, 146, 

In [15]:
m.plot_components(forecast, df_name="test")

INFO - (NP.forecaster.plot_components) - Plotting data from ID test


FigureWidgetResampler({
    'data': [{'line': {'color': '#2d92ff', 'width': 2},
              'mode': 'lines',
              'name': '<b style="color:sandybrown">[R]</b> Trend <i style="color:#fc9944">~1h</i>',
              'showlegend': False,
              'type': 'scatter',
              'uid': 'a971f8c1-1e2e-428f-bbae-b1e366f40f84',
              'x': array([datetime.datetime(2015, 1, 2, 9, 0),
                          datetime.datetime(2015, 1, 2, 10, 0),
                          datetime.datetime(2015, 1, 2, 11, 0), ...,
                          datetime.datetime(2015, 3, 2, 17, 0),
                          datetime.datetime(2015, 3, 2, 19, 0),
                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),
              'xaxis': 'x',
              'y': array([41.138184, 41.136326, 41.134468, ..., 38.49218 , 38.488464, 38.486603],
                         dtype=float32),
              'yaxis': 'y'},
             {'line': {'color': '#2d92ff', 'width': 2},
    

In [16]:
m.plot_parameters()

FigureWidgetResampler({
    'data': [{'fill': 'none',
              'line': {'color': '#2d92ff', 'width': 2},
              'mode': 'lines',
              'name': 'Trend',
              'type': 'scatter',
              'uid': 'f6f21f4d-8199-49f7-a49a-9951dd269bd9',
              'x': array([datetime.datetime(2015, 1, 1, 0, 0),
                          datetime.datetime(2015, 2, 28, 23, 0)], dtype=object),
              'xaxis': 'x',
              'y': array([41.1995 , 38.57022], dtype=float32),
              'yaxis': 'y'},
             {'fill': 'none',
              'line': {'color': '#2d92ff', 'width': 2},
              'mode': 'lines',
              'name': 'yearly',
              'type': 'scatter',
              'uid': 'f0adc090-2190-4a3b-9c9b-97c8cede02e2',
              'x': array([datetime.datetime(2017, 1, 1, 0, 0),
                          datetime.datetime(2017, 1, 2, 0, 0),
                          datetime.datetime(2017, 1, 3, 0, 0), ...,
                          datetim