In [1]:
# !pip install statsforecast --user

In [2]:
from functools import partial

import pandas as pd
import statsforecast
from statsforecast import StatsForecast
from statsforecast.feature_engineering import mstl_decomposition
from statsforecast.models import ARIMA, MSTL
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import smape, mase

In [3]:
df = pd.read_parquet('https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet')
uids = df['unique_id'].unique()[:10]
df = df[df['unique_id'].isin(uids)]
df.head()

Unnamed: 0,unique_id,ds,y
0,H1,1,605.0
1,H1,2,586.0
2,H1,3,586.0
3,H1,4,559.0
4,H1,5,511.0


In [4]:
freq = 1
season_length = 24
horizon = 2 * season_length
valid = df.groupby('unique_id').tail(horizon)
train = df.drop(valid.index)
model = MSTL(season_length=24)
transformed_df, X_df = mstl_decomposition(train, model=model, freq=freq, h=horizon)

In [5]:
transformed_df

Unnamed: 0,unique_id,ds,y,trend,seasonal
0,H1,1,605.0,502.872910,131.419934
1,H1,2,586.0,507.873456,93.100015
2,H1,3,586.0,512.822533,82.155386
3,H1,4,559.0,517.717481,42.412749
4,H1,5,511.0,522.555849,-11.401890
...,...,...,...,...,...
6995,H107,696,4708.0,3947.720625,676.891540
6996,H107,697,4451.0,3955.741399,530.573828
6997,H107,698,4303.0,3963.834683,382.338985
6998,H107,699,4207.0,3971.979313,274.809658


In [6]:
sf = StatsForecast(
    models=[ARIMA(order=(1, 0, 1), season_length=season_length)],
    freq=freq
)
preds = sf.forecast(h=horizon, df=transformed_df, X_df=X_df)
preds.head()

Unnamed: 0,unique_id,ds,ARIMA
0,H1,701,612.737671
1,H1,702,542.851792
2,H1,703,501.93183
3,H1,704,470.248275
4,H1,705,448.115822


In [24]:
valid

Unnamed: 0,unique_id,ds,y
700,H1,701,619.0
701,H1,702,565.0
702,H1,703,532.0
703,H1,704,495.0
704,H1,705,481.0
...,...,...,...
7475,H107,744,4316.0
7476,H107,745,4159.0
7477,H107,746,4058.0
7478,H107,747,3971.0


In [None]:
mase()

In [31]:
smape()

<function utilsforecast.losses.smape(df: pandas.core.frame.DataFrame, models: List[str], id_col: str = 'unique_id', target_col: str = 'y') -> pandas.core.frame.DataFrame>

In [58]:
def compute_evaluation(preds, valid, train, model='ARIMA'):
    full = preds.merge(valid, on=['unique_id', 'ds'])
    mase24 = partial(mase, seasonality=24)
    res = evaluate(full, metrics=[smape, mase24], train_df=train).groupby('metric')[model].mean()
    res_smape = '{:.1%}'.format(res['smape'])
    res_mase = '{:.1f}'.format(res['mase'])
    return pd.Series({'mase': res_mase, 'smape': res_smape})

In [8]:
compute_evaluation(preds)

mase      1.0
smape    3.9%
dtype: object

In [9]:
preds_noexog = sf.forecast(h=horizon, df=train)
compute_evaluation(preds_noexog)

mase      2.3
smape    7.7%
dtype: object

In [16]:
first_300 = train[train['ds'] < 200]

In [35]:
preds_300 = sf.forecast(h=horizon, df=first_300)
compute_evaluation(preds_300, train, train)

mase      2.3
smape    8.5%
dtype: object

In [36]:
from mlforecast import MLForecast

In [37]:
from sklearn.ensemble import RandomForestRegressor

In [76]:
from sklearn.linear_model import LinearRegression

In [38]:
freq

1

In [77]:
ml = MLForecast(
    models=[LinearRegression()],
    freq=freq,
    lags=[1, 7],    
)

In [78]:
ml.fit(df=transformed_df, static_features=[])

MLForecast(models=[LinearRegression], freq=1, lag_features=['lag1', 'lag7'], date_features=[], num_threads=1)

In [85]:
ml.ts.features_order_

['trend', 'seasonal', 'lag1', 'lag7']

In [79]:
ml_preds = ml.predict(7, X_df=X_df)

In [80]:
ml_preds

Unnamed: 0,unique_id,ds,LinearRegression
0,H1,701,625.185517
1,H1,702,569.976792
2,H1,703,525.475387
3,H1,704,486.516066
4,H1,705,458.909381
...,...,...,...
65,H107,703,3260.859735
66,H107,704,3139.280562
67,H107,705,3059.366226
68,H107,706,3011.644873


In [81]:
def compute_evaluation(preds, valid, train, model='ARIMA'):
    full = preds.merge(valid, on=['unique_id', 'ds'])
    mase24 = partial(mase, seasonality=24)
    res = evaluate(full, metrics=[smape, mase24], train_df=train).groupby('metric')[model].mean()
    res_smape = '{:.1%}'.format(res['smape'])
    res_mase = '{:.1f}'.format(res['mase'])
    return pd.Series({'mase': res_mase, 'smape': res_smape})

In [84]:
compute_evaluation(ml_preds, valid, train, model='LinearRegression')

mase      0.8
smape    3.8%
dtype: object