In [0]:
# Core
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Forecasting libs
from statsforecast import StatsForecast
from statsforecast.models import Naive, HistoricAverage, WindowAverage, SeasonalNaive
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsforecast.models import AutoARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from utilsforecast.plotting import plot_series
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import *

from functools import partial
from utilsforecast.feature_engineering import fourier, time_features, pipeline

# Evaluation
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from math import sqrt

# Utility
import warnings
warnings.filterwarnings('ignore')

### AutoARIMA


## Forecasting with time + exogenous features


In [0]:
data_path = "/Users/pju307/retail-forecasting/data/sales_data.csv"
df = pd.read_csv(data_path, parse_dates=['Date'])
df = df[df.duplicated(subset=["Product ID", "Date"], keep=False)]
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values(['Store ID','Product ID','Date'])
df["Store-Product ID"] = df["Store ID"] + "_" + df["Product ID"]
df = df.rename(columns={"Store-Product ID": "unique_id", "Date": "ds", "Demand": "y"})
# Reorder columns: unique_id, ds, y come first
cols = ['unique_id', 'ds', 'y'] + [c for c in df.columns if c not in ['unique_id', 'ds', 'y']]
df = df[cols]
drop_cols = [
    "Store ID",
    "Product ID",
    "Category",
    "Region",
    "Inventory Level",
    "Units Sold",
    "Units Ordered",

]

df = df.drop(columns=drop_cols, errors='ignore')
df['Weather Condition'] = df['Weather Condition'].map({
    'Sunny': 0,
    'Cloudy': 1,
    'Rainy': 2,
    'Snowy': 3
})
df['Seasonality'] = df['Seasonality'].map({
    'Winter': 0,
    'Spring': 1,
    'Summer': 2,
    'Autumn': 3
})
df.head()

In [0]:
df

In [0]:
test = df.groupby("unique_id").tail(7)
train = df.drop(test.index).reset_index(drop=True)

In [0]:
futr_exog_df = test.drop(["y"], axis=1)


In [0]:
horizon = 7

features = [
    partial(fourier, season_length=7, k=2),
    partial(time_features, features=["day", "week", "month"])
]

exog_df, futr_exog_df = pipeline(
    df=df,
    features=features,
    freq="D",
    h=horizon
)

In [0]:
models = [
    AutoARIMA(season_length=7, alias="SARIMA_time_exog")
]

metrics = [
    mae,
    mse, 
    rmse, 
    mape, 
    smape,
    partial(mase, seasonality=7),
    scaled_crps
]

sf = StatsForecast(models=models, freq="D", n_jobs=-1)
cv_time_exog_df = sf.cross_validation(
    h=horizon, # 7 days
    df=exog_df,
    n_windows=8,
    step_size=horizon,
    refit=True,
    level=[80]
)

cv_time_exog_eval = evaluate(
    cv_time_exog_df.drop(["cutoff"], axis=1),
    metrics=metrics,
    train_df=train,
    level=[80]
)
cv_time_exog_eval = cv_time_exog_eval.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
cv_time_exog_eval

In [0]:
cv_time_exog_eval = evaluate(
    cv_time_exog_df.drop(["cutoff"], axis=1),
    metrics=metrics,
    train_df=train,
    level=[80]
)
cv_time_exog_eval = cv_time_exog_eval.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
cv_time_exog_eval

In [0]:
plot_series(
    df=df, 
    forecasts_df=cv_time_exog_df.drop(["y", "cutoff"], axis=1), 
    ids=["S001_P0001"], 
    max_insample_length=140,
    level=[80],
    palette="viridis"
)