In [None]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsforecast import StatsForecast
from statsforecast.arima import ARIMASummary
from statsforecast.models import SeasonalNaive, AutoTBATS, MSTL, AutoMFLES, AutoTheta

from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate

warnings.filterwarnings("ignore")
os.environ["NIXTLA_ID_AS_COL"] = "true"
pd.set_option('display.precision', 3)

In [None]:
plt.rcParams['figure.figsize'] = (9,6)

## Theta

In [None]:
url = "https://raw.githubusercontent.com/marcopeix/AppliedTimeSeriesForecastingInPython/refs/heads/master/data/monthly-milk-production-pounds.csv"
df = pd.read_csv(url)
df['Month'] = pd.to_datetime(df['Month']+'-01')+pd.offsets.MonthEnd(1)
df['Month'] = df['Month'].dt.date
df = df.rename(columns={"Month": "Date", "Monthly milk production (pounds per cow)": "y"})
df.insert(0, 'unique_id', 1)

df.head()

In [None]:
# Theta model

sf = StatsForecast(models=[theta], freq='M')
cv_df = sf.cross_validation(h=12, 
                            df=df, 
                            n_windows=5, 
                            step_size=12, 
                            time_col='Date')

cv_df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['Date'], df['y'])
ax.plot(cv_df['Date'], cv_df['Theta'], label='Theta', ls='--', color='black')
ax.set_xlabel('Date')
ax.set_ylabel('Milk production (lbs/cow)')
ax.set_title('Monthly milk production in Australia')
ax.legend(loc='best')

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df = cv_df.drop(['Date', 'cutoff'], axis=1)
evaluation = evaluate(df=eval_df, metrics=[mae, smape])
avg_evaluation = evaluation.drop(['unique_id'], axis=1).groupby('metric').mean().reset_index()
avg_evaluation

## TBATS

In [None]:
url = "https://raw.githubusercontent.com/marcopeix/time-series-analysis/refs/heads/master/data/daily_traffic.csv"

df = pd.read_csv(url, parse_dates=["date_time"])
df.insert(0, 'unique_id', 1)
df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['date_time'], df['traffic_volume'])
ax.set_xlabel('TIme')
ax.set_ylabel('Traffic volume')
ax.set_title('Daily traffic volume')

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
h = 24

seasonal_naive = SeasonalNaive(season_length=24, alias='naive')

# TBATS model

sf = StatsForecast(models=[seasonal_naive, tbats], freq='H')
cv_df = sf.cross_validation(h=h, 
                            df=df, 
                            n_windows=10, 
                            step_size=h, 
                            time_col='date_time',
                            target_col='traffic_volume')

cv_df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['date_time'], df['traffic_volume'])
ax.plot(cv_df['date_time'], cv_df['TBATS'], ls='--', color='black', label='TBATS')
ax.set_xlabel('TIme')
ax.set_ylabel('Traffic volume')
ax.set_title('Daily traffic volume')
ax.legend()

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df = cv_df.drop(['date_time', 'cutoff'], axis=1)
evaluation = evaluate(df=eval_df, metrics=[mae, smape], target_col='traffic_volume')
evaluation

## MSTL

In [None]:
# MSTL model

sf = StatsForecast(models=[mstl], freq='H')
mstl_cv_df = sf.cross_validation(h=h, 
                            df=df, 
                            n_windows=10, 
                            step_size=h, 
                            time_col='date_time',
                            target_col='traffic_volume')

mstl_cv_df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['date_time'], df['traffic_volume'])
ax.plot(mstl_cv_df['date_time'], mstl_cv_df['MSTL'], ls='--', color='black', label='MSTL')
ax.set_xlabel('TIme')
ax.set_ylabel('Traffic volume')
ax.set_title('Daily traffic volume')
ax.legend()

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df['MSTL'] = mstl_cv_df['MSTL'].values
evaluation = evaluate(df=eval_df, metrics=[mae, smape], target_col='traffic_volume')
evaluation

## MFLES

In [None]:
# MFLES model

sf = StatsForecast(models=[mfles], freq='H')
mfles_cv_df = sf.cross_validation(h=h, 
                            df=df, 
                            n_windows=10, 
                            step_size=h, 
                            time_col='date_time',
                            target_col='traffic_volume')

mfles_cv_df.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df['date_time'], df['traffic_volume'])
ax.plot(mfles_cv_df['date_time'], mfles_cv_df['MFLES'], ls='--', color='black', label='MFLES')
ax.set_xlabel('TIme')
ax.set_ylabel('Traffic volume')
ax.set_title('Daily traffic volume')
ax.legend()

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df['MFLES'] = mfles_cv_df['MFLES'].values
evaluation = evaluate(df=eval_df, metrics=[mae, smape], target_col='traffic_volume')
evaluation