In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from statsforecast import StatsForecast
from statsforecast.models import AutoETS
from tqdm import tqdm
import os

os.environ["NIXTLA_ID_AS_COL"] = "False"

In [2]:
# Load data files
data = pd.read_csv("C:/Data/M5store.csv")

In [3]:
# Define the MASE function
def mase(y, y_pred, y_train, seasonality=1):
    """
    Calculate Mean Absolute Scaled Error (MASE)
    y: Actual values
    y_pred: Predicted values
    y_train: Training data for scaling factor
    seasonality: Seasonal period for naive forecasting
    """
    mae = np.mean(np.abs(y - y_pred))
    naive_forecast_errors = np.abs(y_train[seasonality:] - y_train[:-seasonality])
    scaling_factor = np.mean(naive_forecast_errors)
    return mae / scaling_factor

# Leave-One-Out Cross-Validation for last m points
def leave_one_out_cv_last_m_with_naive(df, m, h, model, seasonality=7):
    """
    Perform Leave-One-Out Cross-Validation on the last m data points with both AutoETS and naive forecasts.
    
    df: DataFrame with columns 'ds', 'y', and 'unique_id'.
    m: Number of data points from the end of the dataset for cross-validation.
    h: Number of steps ahead for forecasting.
    model: Model object with .fit() and .predict() methods.
    seasonality: Seasonal period for naive forecasting.
    
    Returns:
    - MASE values for AutoETS and naive forecasts.
    """
    errors_autoets = []  # Store actual and predicted values for AutoETS
    errors_naive = []    # Store actual and predicted values for naive forecast
    start_index = len(df) - m  # Start index for cross-validation

    for i in range(start_index, len(df) - h + 1):
        # Training and test split
        train_subset = df.iloc[:i]  # Use all points up to the current fold
        test_subset = df.iloc[i:i + h]

        # Fit the model on the training subset
        autoets = model.fit(train_subset['y'].values)

        # Predict for the test subset using AutoETS
        y_hat_autoets = autoets.predict(h=h).get("mean")
        errors_autoets.extend(zip(test_subset['y'].values, y_hat_autoets))

        # Calculate naive forecast
        y_hat_naive = train_subset['y'].iloc[-seasonality:].values.tolist() * h
        y_hat_naive = y_hat_naive[:h]  # Ensure forecast length matches h
        errors_naive.extend(zip(test_subset['y'].values, y_hat_naive))

    # Calculate MASE for AutoETS
    actual_autoets = np.array([e[0] for e in errors_autoets])
    predicted_autoets = np.array([e[1] for e in errors_autoets])
    train_series = df['y'].values  # Full training series for scaling
    mase_autoets = mase(actual_autoets, predicted_autoets, train_series)

    # Calculate MASE for naive forecast
    actual_naive = np.array([e[0] for e in errors_naive])
    predicted_naive = np.array([e[1] for e in errors_naive])
    mase_naive = mase(actual_naive, predicted_naive, train_series)

    return mase_autoets, mase_naive

In [None]:
# Parameters
unique_store_ids = data['store_id'].unique()
m = 28  # Only consider the last m points for cross-validation
h = 1  # Number of steps ahead for forecasting
seasonality = 7  # Weekly seasonality for daily data

results = []

# Loop through each store_id with progress tracking
for store_id in tqdm(unique_store_ids, desc="Processing all store_id series"):
    df = data.loc[data['store_id'] == store_id, ['d', 'revenue', 'store_id']]
    df = df.rename(columns={'d': 'ds', 'revenue': 'y', 'store_id': 'unique_id'})
    
    model = AutoETS(model=["Z", "Z", "Z"], alias="AutoETS", damped=True, season_length=seasonality)
    mase_autoets, mase_naive = leave_one_out_cv_last_m_with_naive(df, m, h, model, seasonality)
    results.append({"store_id": store_id, "AutoETS_MASE": mase_autoets, "Naive_MASE": mase_naive})

# Create a summary table
summary_table = pd.DataFrame(results)

# Calculate average MASE for both AutoETS and naive forecasts
average_autoets_mase = summary_table["AutoETS_MASE"].mean()
average_naive_mase = summary_table["Naive_MASE"].mean()

print("Summary Table of MASE for Each Series:")
print(summary_table)
print(f"\nAverage AutoETS MASE across all series: {average_autoets_mase}")
print(f"Average Naive MASE across all series: {average_naive_mase}")

Processing all store_id series:  80%|████████████████████████████████████████▊          | 8/10 [01:42<00:26, 13.27s/it]