[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nepslor/B5203E-TSAF/blob/main/W7/arima_models_solutions.ipynb)
# ARIMA models
In this exercise we will
* define an AR model using recursive strategy to predict
* define a direct linear forecaster using the same inputs
* rely on external libraries to tune ARIMA models

In [None]:
%%capture
!pip install statsforecast>=1.0.0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df_tunnel = pd.read_csv('https://raw.githubusercontent.com/nepslor/teaching/main/TimeSeriesForecasting/data/tunnel.csv', parse_dates=True, index_col=0)
df_tunnel.index = pd.DatetimeIndex(df_tunnel.index, freq='D')
print(df_tunnel.head())

In [None]:
df_tunnel.plot(figsize=(10, 3))

# plot acf function for df_tunnel
from statsmodels.graphics.tsaplots import plot_acf
fig, ax = plt.subplots(figsize=(10, 3))
plot_acf(df_tunnel, lags=40, ax=ax);

# AR models
Auto regressive models can be defined as:
$$y_t = \sum_{i \in \mathcal{I}}  y_{t-i}\phi_i + \varepsilon_t $$
where $\varepsilon_t \sim \mathcal{N}(0, \sigma)$. This is a slightly different definition w.r.t. the one we introduce during the lecture ($y_t = \sum_{i=1}^{p}  y_{t-i}\phi_i + \varepsilon_t$) which assumed that the features used as regressors are **consecutive** previous steps of the process. As we recall from the exercise on the Taken's theorem, could be worth it to investigare non-uniformly sampled lags as model's feature. That's where the notation $i \in \mathcal{I}$ in the summation comes from: this refers to a generic set of lags $\mathcal{I}$ that are not necessarily consecutive.
Examples for this set could be:
$$
\begin{aligned}
\mathcal{I} &= [1, 2, 3] \\
\mathcal{I} &= [1, 3, 6, 9] \\
\mathcal{I} &= [1, 2, 24, 48]
\end{aligned}
$$


In [None]:
def format_data(y:pd.Series, covariate_lags:list, target_lags:list=None):

  """
  Format data for autoregressive model
  """
  x = []
  target = []
  for i in np.sort(covariate_lags)[::-1]:
    y_lagged = y.shift(i)
    y_lagged.rename(columns={y_lagged.columns[0]: 'y_{:02d}'.format(i)}, inplace=True)
    x.append(y_lagged)
  x = pd.concat(x, axis=1).dropna()

  if target_lags is not None:
    for i in target_lags:
      y_lagged = y.shift(i)
      y_lagged.rename(columns={y_lagged.columns[0]: 'y_{:02d}'.format(i)}, inplace=True)
      target.append(y_lagged)
    target = pd.concat(target, axis=1)
    df = pd.concat({'x':x, 'target':target}, axis=1)
    df = df.dropna()
    x = df['x']
    target = df['target']

  return x, target

x, y = format_data(df_tunnel, covariate_lags=[1, 2, 3], target_lags=[0])

x.iloc[:20].plot(figsize=(10, 3))
y.iloc[:20].plot(ax=plt.gca(), color='black', linestyle='--', title='features and target');

### AR models
In the following you can find an implementation of a simple AR model.
The model is completely defined by the parameter `covariate_lags`, which is a list defining the set $\mathcal{I}$.


The parameters are fitted minimizing the usual sum of square loss on the one step ahead prediction:
$$ \begin{aligned}
\phi^* = &\text{arg}\min_{\phi} \Vert y-  \sum_{i \in \mathcal{I}}  y_{t-i}\phi_i\Vert_2^2\\
=&\text{arg}\min_{\phi} \Vert y-  x\phi\Vert_2^2 \\
=&(x^Tx)^{-1}(x^Ty)
\end{aligned}$$

The prediction is then obtained using the recursive strategy $\hat y_{t+2} = f(\hat y_{t+1})$


<img src="https://github.com/nepslor/B5203E-TSAF/raw/95ab6491476169ca761b47d1bee8735b12346694/pics/recursive_forecast.png" width="400">


In [None]:
class AR_forecaster:
  covariate_lags:list = [1, 2, 3]
  theta:np.array=None
  def __init__(self, covariate_lags=[1]):
    self.covariate_lags = covariate_lags
    self.theta = np.zeros(len(covariate_lags))

  def fit(self, y:pd.Series):
    # obtain the lags defining the model's features
    x, y = format_data(y, covariate_lags=self.covariate_lags, target_lags=[-1])
    # fit the parameters
    self.theta = np.linalg.inv(x.values.T @ x.values) @ x.values.T @ y.values
    return self

  def predict(self, y:pd.Series, steps_ahead=1):
    # recursive prediction: at each step the model predicts one step ahead
    # the prediction is then used as last observation
    y_pred = []
    x_i, _ = format_data(y, covariate_lags=self.covariate_lags)
    x_i = x_i.iloc[[-1], :].values

    for i in range(steps_ahead):
      # prediction step
      y_pred_i = x_i @ self.theta
      # store the prediction
      y_pred.append(y_pred_i)
      # recursive strategy - augment and roll the feature vector
      x_i = np.hstack([x_i, y_pred_i])
      x_i = x_i[:, 1:]
    return pd.Series(np.hstack(y_pred).ravel(), index=pd.date_range(start=y.index[-1]+pd.Timedelta(days=1), periods=steps_ahead, freq='D'))


# Comparing different models

The following function fits the AR model on a training set and then performs sliding window predictions. You can use this to evaluate different models and find the best combinations of lags.

In [None]:
def evaluate_model(model_class, y, steps_ahead, tr_ratio=0.8, **model_kwargs):
  n_tr = int(len(y)*tr_ratio)
  n_val = len(y) - n_tr
  y_tr = y.iloc[:n_tr]
  model = model_class(**model_kwargs).fit(y_tr)
  y_hats = []
  errs = []
  for i in range(n_val-steps_ahead):
    y_hat_i = model.predict(y.iloc[:n_tr+i], steps_ahead=steps_ahead).values
    y_hats.append(y_hat_i)
    y_true_i = y.iloc[n_tr+i:n_tr+i+steps_ahead].values.ravel()
    errs.append((y_hat_i - y_true_i))
  y_hats = np.vstack(y_hats)
  errs = np.vstack(errs)
  score = np.mean((np.mean(np.abs(np.array(errs)), axis=1)))
  return score, y_hats, errs

tr_ratio = 0.8
steps_ahead = 14
score, y_hat, errs = evaluate_model(AR_forecaster, df_tunnel,
                                    covariate_lags=np.arange(7),
                                    steps_ahead=steps_ahead,
                                    tr_ratio=tr_ratio)
print('AR model score:{:0.2e}'.format(score))

In [None]:
#@title Animation
# create an animation plotting each row of y_hat and y_te
from matplotlib import animation
n_tr = int(len(df_tunnel)*tr_ratio)
fig, ax = plt.subplots(figsize=(10, 3))
def animate(y_hats, y_all):

  def animate(i):
    ax.clear()
    ax.plot(y_hats[i], label='y_hat')
    ax.plot(y_all.iloc[n_tr+i:n_tr+i+steps_ahead].values, label='y_te')
    ax.legend(loc='upper left')
    return ax,

  ani = animation.FuncAnimation(fig, animate, frames=100, interval=80)
  from IPython.display import HTML
  plt.close(fig)
  return HTML(ani.to_jshtml())

animate(y_hat, df_tunnel)

Let's check the autocorrelation of the 1-step-ahead predictions:

In [None]:
# check autocorrelation of the errors 1 step ahead
fig, ax = plt.subplots(figsize=(10, 3))
plot_acf(errs[:, 0], lags=40, ax=ax);

### ❓ Find the best number of covariates
We can explore the effect of increasing the number of past timesteps used by the AR forecaster:

In [None]:
n_tr = int(len(df_tunnel)*0.5)
y_tr = df_tunnel.iloc[:n_tr]
y_te = df_tunnel.iloc[n_tr:]

y_tr.iloc[-100:].plot()
for i in [5, 6, 7, 8, 120]:
  ar_model = AR_forecaster(covariate_lags=range(i)).fit(df_tunnel)
  ar_model.predict(y_tr, steps_ahead=21).plot(figsize=(10, 3),ax=plt.gca(), label='order {}'.format(i))
plt.legend(loc='lower left')


# ❓Direct AR forecaster
The following block defines a direct autoregressive forecaster, which predicts all the covariates in one shot. Since we are using a linear regression there's no need to fit n_sa different models, we just need to pass a multi-column target.


<img src="https://github.com/nepslor/B5203E-TSAF/raw/95ab6491476169ca761b47d1bee8735b12346694/pics/direct_forecast.png" width="400">

Fill in the missing line

In [None]:
class direct_forecaster:
  covariate_lags:list = [1, 2, 3]
  theta:np.array=None
  steps_ahead:int = 1
  def __init__(self, covariate_lags=[1],steps_ahead=1):
    self.covariate_lags = covariate_lags
    self.theta = np.zeros(len(covariate_lags))
    self.steps_ahead = steps_ahead

  def fit(self, y:pd.Series):
    # obtain the lags defining the model's features
    x, y = format_data(y, covariate_lags=self.covariate_lags, target_lags=-np.arange(1, steps_ahead+1))
    # fit the parameters
    self.theta = np.linalg.inv(x.values.T @ x.values) @ x.values.T @ y.values
    return self

  def predict(self, y:pd.Series, steps_ahead=1):
    # recursive prediction: at each step the model predicts one step ahead
    # the prediction is then used as last observation
    y_pred = []
    x_i, _ = format_data(y, covariate_lags=self.covariate_lags)
    x_i = x_i.iloc[[-1], :].values
    # direct forecast
    y_pred = x_i @ self.theta
    return pd.Series(y_pred.ravel(), index=pd.date_range(start=y.index[-1]+pd.Timedelta(days=1), periods=steps_ahead, freq='D'))

score, y_hat, errs = evaluate_model(direct_forecaster, df_tunnel,
                                    covariate_lags=np.arange(14),
                                    steps_ahead=steps_ahead,
                                    tr_ratio=tr_ratio)
print('AR model score:{:0.2e}'.format(score))

animate(y_hat, df_tunnel)


In [None]:
fig, ax = plt.subplots(figsize=(10, 3))
plot_acf(errs[:, 0], lags=40, ax=ax);

In [None]:
from statsforecast.models import AutoARIMA
from statsforecast.core import StatsForecast
from functools import partial

class Model_wrap(AutoARIMA):
  def __init__(self, **kwargs):
    # super init
    super().__init__(**kwargs)

  def predict(self, y, steps_ahead):
    return pd.Series(self.forward(y, h=steps_ahead)['mean'])


# change the format to nixta compatible
df_tunnel_nixtla = df_tunnel.reset_index()
df_tunnel_nixtla.columns = ['ds', 'y']
score, y_hat, errs = evaluate_model(partial(Model_wrap, season_length=14, stepwise=False),
                                    df_tunnel_nixtla['y'],
                                    steps_ahead=steps_ahead,
                                    tr_ratio=tr_ratio)
print('AR model score:{:0.2e}'.format(score))

animate(y_hat, df_tunnel)

In [None]:
fig, ax = plt.subplots(figsize=(10, 3))
plot_acf(errs[:, 0], lags=40, ax=ax);

# ❓ Try to beat the benchmarks
Using either the given implementation of the AR model and the direct forecaster or the sktime pipeline, try to beat the proposed models in terms of MAE by tuning the models parameters.

