In [None]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsforecast import StatsForecast
from statsforecast.arima import ARIMASummary
from statsforecast.models import AutoARIMA, SimpleExponentialSmoothingOptimized, Holt, HoltWinters, SeasonalNaive

from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate

warnings.filterwarnings("ignore")
os.environ["NIXTLA_ID_AS_COL"] = "true"
pd.set_option('display.precision', 3)

In [None]:
plt.rcParams['figure.figsize'] = (9,6)

## SARIMA

In [None]:
url = "https://raw.githubusercontent.com/marcopeix/AppliedTimeSeriesForecastingInPython/refs/heads/master/data/monthly-milk-production-pounds.csv"
df = pd.read_csv(url)
df['Month'] = pd.to_datetime(df['Month']+'-01')+pd.offsets.MonthEnd(1)
df['Month'] = df['Month'].dt.date
df = df.rename(columns={"Month": "Date", "Monthly milk production (pounds per cow)": "y"})
df.insert(0, 'unique_id', 1)

df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['Date'], df['y'])
ax.set_xlabel('Date')
ax.set_ylabel('Milk production (lbs/cow)')
ax.set_title('Monthly milk production in Australia')

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
# ARIMA

# SARIMA

sf = StatsForecast(models=[arima, sarima], freq='M')
cv_df = sf.cross_validation(h=12, 
                            df=df, 
                            n_windows=5, 
                            step_size=12, 
                            level=[80], 
                            time_col='Date')

cv_df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['Date'], df['y'])
ax.plot(cv_df['Date'], cv_df['ARIMA'], label='ARIMA')
ax.plot(cv_df['Date'], cv_df['SARIMA'], label='SARIMA')
ax.set_xlabel('Date')
ax.set_ylabel('Milk production (lbs/cow)')
ax.set_title('Monthly milk production in Australia')
ax.legend(loc='best')

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df = cv_df.drop(['Date', 'cutoff'], axis=1)
evaluation = evaluate(df=eval_df, metrics=[mae, smape])
avg_evaluation = evaluation.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
avg_evaluation

In [None]:
sf = StatsForecast(models=[arima, sarima], freq='M')
sf.fit(df=df,time_col='Date')

print(ARIMASummary(sf.fitted_[0, 0].model_))
print(ARIMASummary(sf.fitted_[0, 1].model_))

## SARIMAX

In [None]:
url = "https://raw.githubusercontent.com/marcopeix/TimeSeriesForecastingUsingFoundationModels/refs/heads/main/data/walmart_sales_small.csv"

df = pd.read_csv(url, parse_dates=["Date"])
df.head()

In [None]:
df = df[['Store', 'Date', 'Weekly_Sales', 'Holiday_Flag']]
df.head()

In [None]:
sarimax = AutoARIMA(max_p=5,
                    max_q=5,
                    max_d=2,
                    max_P=2,
                    max_Q=2,
                    max_D=1,
                    start_p=1,
                    start_q=1,
                    start_P=0,
                    start_Q=0,
                    season_length=1)

sf = StatsForecast(models=[sarimax], freq='W')
cv_df = sf.cross_validation(h=8, 
                            df=df, 
                            n_windows=10, 
                            step_size=8, 
                            level=[80], 
                            time_col='Date', 
                            id_col='Store', 
                            target_col='Weekly_Sales')

cv_df.head()

In [None]:
eval_df = cv_df.drop(['Date', 'cutoff'], axis=1)
evaluation = evaluate(df=eval_df, metrics=[mae, smape], target_col='Weekly_Sales', id_col='Store')
avg_evaluation = evaluation.drop(['Store'], axis=1).groupby('metric').mean().reset_index()
avg_evaluation

## Exponential smoothing

In [None]:
Y_df = pd.read_parquet('https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet')

uids = Y_df['unique_id'].unique()[:8] # Select 10 ids to make the example faster
Y_df = Y_df.query('unique_id in @uids')
Y_df = Y_df.groupby('unique_id').tail(7 * 24) #Select last 7 days of data to make example faster

Y_df.head()

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    uid = uids[i]
    plot_df = Y_df[Y_df['unique_id'] == uid]

    ax.plot(plot_df['ds'], plot_df['y'])
    ax.set_title(f'{uid}')
    ax.set_xlabel('Time steps')
    ax.set_ylabel('Value')

fig.autofmt_xdate()
plt.tight_layout()

### Simple exponential smoothing

In [None]:
# Set horizon

# Seasonal naive

# SES

sf = StatsForecast(models=[seasonal_naive, ses], freq=1)

cv_df = sf.cross_validation(h=h,
                            df=Y_df,
                            n_windows=3,
                            step_size=h)

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    uid = uids[i]
    plot_df = Y_df[Y_df['unique_id'] == uid]
    preds_df = cv_df[cv_df['unique_id'] == uid]

    ax.plot(plot_df['ds'], plot_df['y'])
    ax.plot(preds_df['ds'], preds_df['SeasonalNaive'], label='Seasonal naive')
    ax.plot(preds_df['ds'], preds_df['SES'], label='SES')
    ax.set_title(f'{uid}')
    ax.set_xlabel('Time steps')
    ax.set_ylabel('Value')
    ax.legend(loc=2)

fig.autofmt_xdate()
plt.tight_layout()

### Double exponential smoothing

In [None]:
# DES (Holt)


sf = StatsForecast(models=[seasonal_naive, ses, des], freq=1)

cv_df = sf.cross_validation(h=h,
                            df=Y_df,
                            n_windows=3,
                            step_size=h)

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    uid = uids[i]
    plot_df = Y_df[Y_df['unique_id'] == uid]
    preds_df = cv_df[cv_df['unique_id'] == uid]

    ax.plot(plot_df['ds'], plot_df['y'])
    ax.plot(preds_df['ds'], preds_df['SeasonalNaive'], label='Seasonal naive')
    ax.plot(preds_df['ds'], preds_df['Holt'], label='DES')
    ax.set_title(f'{uid}')
    ax.set_xlabel('Time steps')
    ax.set_ylabel('Value')
    ax.legend(loc=2)

fig.autofmt_xdate()
plt.tight_layout()

### Triple exponential smoothing

In [None]:
# TES (Holt-Winters)


sf = StatsForecast(models=[seasonal_naive, ses, des, tes], freq=1)

cv_df = sf.cross_validation(h=h,
                            df=Y_df,
                            n_windows=3,
                            step_size=h)

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    uid = uids[i]
    plot_df = Y_df[Y_df['unique_id'] == uid]
    preds_df = cv_df[cv_df['unique_id'] == uid]

    ax.plot(plot_df['ds'], plot_df['y'])
    ax.plot(preds_df['ds'], preds_df['SeasonalNaive'], label='Seasonal naive')
    ax.plot(preds_df['ds'], preds_df['HoltWinters'], label='TES')
    ax.set_title(f'{uid}')
    ax.set_xlabel('Time steps')
    ax.set_ylabel('Value')
    ax.legend(loc=2)

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df = cv_df.drop(['ds', 'cutoff'], axis=1)
evaluation = evaluate(df=eval_df, metrics=[mae, smape])
avg_evaluation = evaluation.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
avg_evaluation