In [None]:
!pip install neuralforecast utilsforecast

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utilsforecast.plotting import plot_series
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import *

from neuralforecast.core import NeuralForecast
from neuralforecast.models import MLP, MLPMultivariate
from neuralforecast.losses.pytorch import *

import warnings
warnings.filterwarnings("ignore")

In [None]:
def plot_metrics_bar(eval_df):
    methods = eval_df.columns[1:]
    values = eval_df.iloc[0].values[1:]

    sorted_data = sorted(zip(methods, values), key=lambda x: x[1], reverse=True)
    methods_sorted, values_sorted = zip(*sorted_data)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(methods_sorted, values_sorted)

    for bar, value in zip(bars, values_sorted):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
                 f'{value:.3f}', ha='center', va='bottom', fontweight='bold')

    plt.xlabel('Methods')
    plt.ylabel('Mean absolute error (MAE)')
    plt.tight_layout()

    plt.show()

In [None]:
DATA_URL = "https://raw.githubusercontent.com/marcopeix/youtube_tutorials/refs/heads/main/data/daily_sales_french_bakery.csv"
df = pd.read_csv(DATA_URL, parse_dates=["ds"])
df = df.groupby('unique_id').filter(lambda x: len(x) >= 28)
df.head()

In [None]:
plot_series(df=df, ids=["BAGUETTE", "CROISSANT"], palette="viridis")

In [None]:
plot_series(df=df, ids=["BAGUETTE", "CROISSANT"], max_insample_length=56, palette="viridis")

In [None]:
unique_ids = ["BAGUETTE", "CROISSANT"]
df = df[df["unique_id"].isin(unique_ids)]

## Forecasting and cross-validation

In [None]:
HORIZON = 7

In [None]:
last_date = df['ds'].max()
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=HORIZON, freq='D')

# Get the last unit_price for each unique_id
last_unit_prices = df.groupby('unique_id')['unit_price'].last()

futr_df = []
for uid in unique_ids:
    last_price = last_unit_prices[uid]
    for date in future_dates:
        futr_df.append({
            'unique_id': uid,
            'ds': date,
            'unit_price': last_price
        })

futr_df = pd.DataFrame(futr_df)
futr_df

In [None]:
plot_series(df=df, forecasts_df=preds, max_insample_length=56, palette="viridis")

### Cross-validation

In [None]:
plot_metrics_bar(evaluation)

In [None]:
plot_series(
    df=df,
    forecasts_df=cv_df.drop(["y", "cutoff"], axis=1),
    max_insample_length=140,
    palette="viridis"
)

## Probabilistic forecasting
### Loss function

In [None]:
models = [
    MLP(
        h=HORIZON,
        input_size=14,
        futr_exog_list=["unit_price"],
        num_layers=2,
        hidden_size=1024,
        # Set the loss function
        max_steps=1000,
        early_stop_patience_steps=3,
        val_check_steps=100,
        scaler_type="identity"
        )
]

nf = NeuralForecast(models=models, freq="D")

cv_df_mqloss = nf.cross_validation(
    df=df,
    step_size=HORIZON,
    val_size=HORIZON,
    test_size=8*HORIZON,
    refit=False,
    n_windows=None,
    # Set the prediction interval
)

# Rename column for easier evaluation and plotting


cv_df_mqloss.head()

In [None]:
metrics = [
    scaled_crps
]

evaluation = evaluate(
    cv_df_mqloss.drop(["ds", "cutoff"], axis=1),
    metrics=metrics,
    models=["MLP"],
    level=[80]
)
evaluation = evaluation.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
evaluation

In [None]:
plot_series(
    df=df,
    forecasts_df=cv_df_mqloss.drop(["y", "cutoff"], axis=1),
    # models=["MLP"],
    max_insample_length=140,
    level=[80],
    palette="viridis"
)

### Conformal predictions

In [None]:
from neuralforecast.utils import PredictionIntervals

In [None]:
# Initialize PredictionIntervals()
prediction_intervals = PredictionIntervals()

models = [
    MLP(
        h=HORIZON,
        input_size=14,
        futr_exog_list=["unit_price"],
        num_layers=2,
        hidden_size=1024,
        loss=MAE(),
        max_steps=400,
        scaler_type="identity"
        )
]

nf = NeuralForecast(models=models, freq="D")

cv_df_conformal = nf.cross_validation(
    df=df,
    step_size=HORIZON,
    test_size=8*HORIZON,
    refit=True,
    n_windows=None,
    # Specify prediction intervals
)

cv_df_conformal.head()

In [None]:
metrics = [
    scaled_crps
]

evaluation = evaluate(
    cv_df_conformal.drop(["ds", "cutoff"], axis=1),
    metrics=metrics,
    models=["MLP"],
    level=[80]
)
evaluation = evaluation.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
evaluation

In [None]:
plot_series(
    df=df,
    forecasts_df=cv_df_conformal.drop(["y", "cutoff"], axis=1),
    # models=["MLP"],
    max_insample_length=140,
    level=[80],
    palette="viridis"
)

## Training on large datasets

In [None]:
import os

In [None]:
# Partion data into multiple parquet files


In [None]:
files_list

In [None]:
models = [
    MLP(
        h=HORIZON,
        input_size=14,
        futr_exog_list=["unit_price"],
        num_layers=2,
        hidden_size=1024,
        loss=MAE(),
        max_steps=400,
        scaler_type="identity"
        )
]

nf = NeuralForecast(models=models, freq="D")
nf.fit(df=files_list)

In [None]:
plot_series(df=df, forecasts_df=preds, max_insample_length=56, palette="viridis")

## Hyperparameter optimization

In [None]:
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch

from neuralforecast.auto import AutoMLP

In [None]:
# Define the search space


In [None]:
# Initialize AutoMLP


In [None]:
nf = NeuralForecast(models=[model], freq='D')
nf.fit(df=df.drop(columns=["unit_price"]), val_size=28)

In [None]:
results = nf.models[0].results.get_dataframe()

config_cols = [col for col in results.columns if col.startswith('config')]
columns_to_keep = ['loss', 'train_loss'] + config_cols
existing_columns = [col for col in columns_to_keep if col in results.columns]
filtered_results = results[existing_columns]
best_runs = filtered_results.sort_values('loss', ascending=True).head(5)
best_runs