# VAR, VARMA and VARMAX 

In [None]:
from statsmodels.tsa.stattools import grangercausalitytests, adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.statespace.varmax import VARMAX
from tqdm import tqdm_notebook
from itertools import product

import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [None]:
plt.rcParams["figure.figsize"] = (9,6)

Source: Statistics Canada. Table 32-10-0077-01  [Farm product prices, crops and livestock](https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=3210007701)

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=1, dpi=300, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    data = df[df.columns[i+1]]
    
    ax.plot(data, color='black', linewidth=1)
    ax.set_title(df.columns[i+1])
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')
    ax.spines['top'].set_alpha(0)
    ax.tick_params(labelsize=6)

plt.setp(axes, xticks=np.arange(0, 369, 24), xticklabels=np.arange(1992, 2023, 2))

fig.autofmt_xdate()

plt.tight_layout()

## VAR(p) 

### Check for stationarity 

In [None]:
# Run the ADF test for the "cows" series

print('cows')
print(f'ADF Statistic: {ad_fuller_result_1[0]}')
print(f'p-value: {ad_fuller_result_1[1]}')

print('\n---------------------\n')

# Run the ADF test for the "calves" series

print('calves')
print(f'ADF Statistic: {ad_fuller_result_2[0]}')
print(f'p-value: {ad_fuller_result_2[1]}')

In [None]:
# Run the ADF test for the differenced "cows" series

print('cows')
print(f'ADF Statistic: {ad_fuller_result_1[0]}')
print(f'p-value: {ad_fuller_result_1[1]}')

print('\n---------------------\n')

# Run the ADF test for the differences "calves" series

print('calves')
print(f'ADF Statistic: {ad_fuller_result_2[0]}')
print(f'p-value: {ad_fuller_result_2[1]}')

### Model selection 

In [None]:
def VAR_gridsearch(endog, min_p, max_p):
    
    # Determine the range of possible values for p
    
    # Initialize an empty list to store the order and AIC
    
    print(f'Fitting {all_p} unique models')
    
    for p in tqdm_notebook(all_p):
        try:
            # Fit a VAR(p) model
        except:
            continue
        
        # Append the order and AIC to the list

    # Store the results in a DataFrame and rename the columns
        
    # Sort the DataFrame in ascending order of AIC
    
    return result_df

In [None]:
# Get the endogenous variables

# Get the differenced endogenous variables

# Train/test split (last 48 times steps are for the test set)

# Run the grid search


In [None]:
print('calves Granger-causes cows?\n')
print('------------------')
# Run the Granger causality test calves -> cows

print('\ncows Granger-causes calves?\n')
print('------------------')
# Run the Granger causality test cows -> calves


### Residuals analysis
#### cows

In [None]:
# cows


In [None]:
def ljung_box_test(residuals, is_seasonal, period):
    
    if is_seasonal:
        lb_df = acorr_ljungbox(residuals, period=period)
    else:
        max_lag = min([10, len(residuals)/5])
        
        lb_df = acorr_ljungbox(residuals, np.arange(1, max_lag+1, 1))

    fig, ax = plt.subplots()
    ax.plot(lb_df['lb_pvalue'], 'b-', label='p-values')
    ax.hlines(y=0.05, xmin=1, xmax=len(lb_df), color='black')
    plt.tight_layout()
    
    if all(pvalue > 0.05 for pvalue in lb_df['lb_pvalue']):
        print('All values are above 0.05. We fail to reject the null hypothesis. The residuals are uncorrelated')
    else:
        print('At least one p-value is smaller than 0.05')

#### calves 

In [None]:
# calves


### Forecasting

In [None]:
def rolling_predictions(df, last_train_value, train_len, horizon, window, method):
    
    total_len = train_len + horizon
    
    if method == 'VAR':

        # Get the predictions from the VAR model
        
        return cows_pred_VAR[:horizon], calves_pred_VAR[:horizon]
    
    elif method == 'last':
        cows_pred_last = []
        calves_pred_last = []
        
        for i in range(train_len, total_len, window):
            
            cows_last = df[:i].iloc[-1]['cows']
            calves_last = df[:i].iloc[-1]['calves']
            
            cows_pred_last.extend(cows_last for _ in range(window))
            calves_pred_last.extend(calves_last for _ in range(window))
        
        cows_pred_last = np.insert(cows_pred_last, 0, last_train_value['cows'])
        cows_pred_last = cows_pred_last.cumsum()
        
        calves_pred_last = np.insert(calves_pred_last, 0, last_train_value['calves'])
        calves_pred_last = calves_pred_last.cumsum()
            
        return cows_pred_last[:horizon], calves_pred_last[:horizon]

In [None]:
pred_df = df[['cows', 'calves']][-48:]

TRAIN_LEN = len(train)
HORIZON = len(test)
LAST_TRAIN_VALUE = endog.iloc[321]

windows = [1, 4, 6, 12]

for window in windows:
    cows_pred_last, calves_pred_last = rolling_predictions(endog_diff, LAST_TRAIN_VALUE, TRAIN_LEN, HORIZON, window, 'last')
    cows_pred_VAR, calves_pred_VAR = rolling_predictions(endog_diff, LAST_TRAIN_VALUE, TRAIN_LEN, HORIZON, window, 'VAR')
    
    pred_df[f'cows_pred_last_{window}'] = cows_pred_last
    pred_df[f'calves_pred_last_{window}'] = calves_pred_last
    pred_df[f'cows_pred_VAR_{window}'] = cows_pred_VAR
    pred_df[f'calves_pred_VAR_{window}'] = calves_pred_VAR
    
pred_df.head()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    
    ax.plot(df['cows'], color='blue')
    ax.plot(df['calves'], color='orange')
    ax.plot(pred_df['cows'], color='blue', label='cows')
    ax.plot(pred_df['calves'], color='orange', label='calves')
    ax.plot(pred_df[f'cows_pred_last_{windows[i]}'], 'r-.', label='last')
    ax.plot(pred_df[f'calves_pred_last_{windows[i]}'], 'r-.')
    ax.plot(pred_df[f'cows_pred_VAR_{windows[i]}'], 'k--', label='VAR')
    ax.plot(pred_df[f'calves_pred_VAR_{windows[i]}'], 'k--')
    ax.legend(loc=2)
    ax.set_xlabel('Date')
    ax.set_ylabel('$/hundredweight')
    ax.axvspan(322, 369, color='#808080', alpha=0.2)
    ax.set_xlim(300, 369)
    ax.set_title(f'Horizon = {windows[i]}')
    
plt.setp(axes, xticks=np.arange(300, 369, 12), xticklabels=np.arange(2017, 2023, 1))

fig.autofmt_xdate()
plt.tight_layout()

### Evaluation 

In [None]:
from sklearn.metrics import mean_absolute_error

mae_cow_last_1 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_1'])
mae_calves_last_1 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_1'])
mae_cow_VAR_1 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VAR_1'])
mae_calves_VAR_1 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VAR_1'])

mae_cow_last_4 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_4'])
mae_calves_last_4 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_4'])
mae_cow_VAR_4 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VAR_4'])
mae_calves_VAR_4 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VAR_4'])

mae_cow_last_6 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_6'])
mae_calves_last_6 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_6'])
mae_cow_VAR_6 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VAR_6'])
mae_calves_VAR_6 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VAR_6'])

mae_cow_last_12 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_12'])
mae_calves_last_12 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_12'])
mae_cow_VAR_12 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VAR_12'])
mae_calves_VAR_12 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VAR_12'])

In [None]:
mae_cows_last = [mae_cow_last_1, mae_cow_last_4, mae_cow_last_6, mae_cow_last_12]
mae_calves_last = [mae_calves_last_1, mae_calves_last_4, mae_calves_last_6, mae_calves_last_12]
mae_cows_VAR = [mae_cow_VAR_1, mae_cow_VAR_4, mae_cow_VAR_6, mae_cow_VAR_12]
mae_calves_VAR = [mae_calves_VAR_1, mae_calves_VAR_4, mae_calves_VAR_6, mae_calves_VAR_12]

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12,9))

width = 0.3
labels = ['Cows', 'Calves']
x = np.arange(len(labels))

for i, ax in enumerate(axes.flatten()):
    y_last = [mae_cows_last[i], mae_calves_last[i]]
    y_VAR = [mae_cows_VAR[i], mae_calves_VAR[i]]
    
    ax.bar(x - width/2, y_last, width, label='Last')
    ax.bar(x + width/2, y_VAR, width, label='VAR')
    
    ax.set_ylim(0, max(y_last) + 10)
    ax.set_ylabel('MAE')
    ax.set_xlabel('Target')
    ax.set_title(f'Horizon = {windows[i]}')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend(loc='best')
    
    for i, v in enumerate(y_last):
        ax.text(x=i - width/2, y=v+1, s=str(round(v, 2)), ha='center')
    for i, v in enumerate(y_VAR):
        ax.text(x=i + width/2, y=v+1, s=str(round(v, 2)), ha='center')
    
plt.tight_layout()

## VARMA(p,q) 

In [None]:
def VARMA_gridsearch(endog, min_p, max_p, min_q, max_q):
    
    all_p = range(min_p, max_p+1, 1)
    # Define the range of values for q

    # Make a list of all possible (p,q) combinations
    
    results = []
    
    print(f'Fitting {len(all_orders)} unique models')
    
    for order in tqdm_notebook(all_orders):
        try:
            # Fit a VARMA model
        except:
            continue
    
        results.append([order, model.aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)', 'AIC']
    
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

We don't need to redo the Granger causality test, since $p=6$

### Residuals analysis 

In [None]:
# cows


In [None]:
# calves


### Forecasting 

In [None]:
def rolling_predictions(df, last_train_value, train_len, horizon, window, method):
    
    total_len = train_len + horizon
    
    if method == 'VARMA':

        cows_pred_VARMA = []
        calves_pred_VARMA = []
        
        for i in range(train_len, total_len, window):
            # Initialize the VARMA(p,q) model
            
            res = model.fit(disp=False)
            predictions = res.get_prediction(0, i + window - 1)
            
            oos_pred_cows = predictions.predicted_mean.iloc[-window:]['cows']
            oos_pred_calves = predictions.predicted_mean.iloc[-window:]['calves']
            
            cows_pred_VARMA.extend(oos_pred_cows)
            calves_pred_VARMA.extend(oos_pred_calves)
            
        cows_pred_VARMA = np.insert(cows_pred_VARMA, 0, last_train_value['cows'])
        cows_pred_VARMA = cows_pred_VARMA.cumsum()
        
        calves_pred_VARMA = np.insert(calves_pred_VARMA, 0, last_train_value['calves'])
        calves_pred_VARMA = calves_pred_VARMA.cumsum()
        
        return cows_pred_VARMA[:horizon], calves_pred_VARMA[:horizon]
    
    elif method == 'last':
        cows_pred_last = []
        calves_pred_last = []
        
        for i in range(train_len, total_len, window):
            
            cows_last = df[:i].iloc[-1]['cows']
            calves_last = df[:i].iloc[-1]['calves']
            
            cows_pred_last.extend(cows_last for _ in range(window))
            calves_pred_last.extend(calves_last for _ in range(window))
        
        cows_pred_last = np.insert(cows_pred_last, 0, last_train_value['cows'])
        cows_pred_last = cows_pred_last.cumsum()
        
        calves_pred_last = np.insert(calves_pred_last, 0, last_train_value['calves'])
        calves_pred_last = calves_pred_last.cumsum()
            
        return cows_pred_last[:horizon], calves_pred_last[:horizon]

In [None]:
pred_df = df[['cows', 'calves']][-48:]

TRAIN_LEN = len(train)
HORIZON = len(test)
LAST_TRAIN_VALUE = endog.iloc[321]

windows = [1, 4, 6, 12]

for window in windows:
    cows_pred_last, calves_pred_last = rolling_predictions(endog_diff, LAST_TRAIN_VALUE, TRAIN_LEN, HORIZON, window, 'last')
    cows_pred_VARMA, calves_pred_VARMA = rolling_predictions(endog_diff, LAST_TRAIN_VALUE, TRAIN_LEN, HORIZON, window, 'VARMA')
    
    pred_df[f'cows_pred_last_{window}'] = cows_pred_last
    pred_df[f'calves_pred_last_{window}'] = calves_pred_last
    pred_df[f'cows_pred_VARMA_{window}'] = cows_pred_VARMA
    pred_df[f'calves_pred_VARMA_{window}'] = calves_pred_VARMA
    
pred_df.head()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12,9))

for i, ax in enumerate(axes.flatten()):
    
    ax.plot(df['cows'], color='blue')
    ax.plot(df['calves'], color='orange')
    ax.plot(pred_df['cows'], color='blue', label='cows')
    ax.plot(pred_df['calves'], color='orange', label='calves')
    ax.plot(pred_df[f'cows_pred_last_{windows[i]}'], 'r-.', label='last')
    ax.plot(pred_df[f'calves_pred_last_{windows[i]}'], 'r-.')
    ax.plot(pred_df[f'cows_pred_VARMA_{windows[i]}'], 'k--', label='VARMA')
    ax.plot(pred_df[f'calves_pred_VARMA_{windows[i]}'], 'k--')
    ax.legend(loc=2)
    ax.set_xlabel('Date')
    ax.set_ylabel('$/hundredweight')
    ax.axvspan(322, 369, color='#808080', alpha=0.2)
    ax.set_xlim(300, 369)
    ax.set_title(f'Horizon = {windows[i]}')
    
plt.setp(axes, xticks=np.arange(300, 369, 12), xticklabels=np.arange(2017, 2023, 1))

fig.autofmt_xdate()
plt.tight_layout()

### Evaluation 

In [None]:
mae_cow_last_1 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_1'])
mae_calves_last_1 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_1'])
mae_cow_VARMA_1 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VARMA_1'])
mae_calves_VARMA_1 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VARMA_1'])

mae_cow_last_4 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_4'])
mae_calves_last_4 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_4'])
mae_cow_VARMA_4 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VARMA_4'])
mae_calves_VARMA_4 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VARMA_4'])

mae_cow_last_6 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_6'])
mae_calves_last_6 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_6'])
mae_cow_VARMA_6 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VARMA_6'])
mae_calves_VARMA_6 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VARMA_6'])

mae_cow_last_12 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last_12'])
mae_calves_last_12 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last_12'])
mae_cow_VARMA_12 = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VARMA_12'])
mae_calves_VARMA_12 = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VARMA_12'])

In [None]:
mae_cows_last = [mae_cow_last_1, mae_cow_last_4, mae_cow_last_6, mae_cow_last_12]
mae_calves_last = [mae_calves_last_1, mae_calves_last_4, mae_calves_last_6, mae_calves_last_12]
mae_cows_VARMA = [mae_cow_VARMA_1, mae_cow_VARMA_4, mae_cow_VARMA_6, mae_cow_VARMA_12]
mae_calves_VARMA = [mae_calves_VARMA_1, mae_calves_VARMA_4, mae_calves_VARMA_6, mae_calves_VARMA_12]

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12,9))

width = 0.3
labels = ['Cows', 'Calves']
x = np.arange(len(labels))

for i, ax in enumerate(axes.flatten()):
    y_last = [mae_cows_last[i], mae_calves_last[i]]
    y_VARMA = [mae_cows_VARMA[i], mae_calves_VARMA[i]]
    
    ax.bar(x - width/2, y_last, width, label='Last')
    ax.bar(x + width/2, y_VARMA, width, label='VARMA')
    
    ax.set_ylim(0, max(y_last) + 10)
    ax.set_ylabel('MAE')
    ax.set_xlabel('Target')
    ax.set_title(f'Horizon = {windows[i]}')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend(loc='best')
    
    for i, v in enumerate(y_last):
        ax.text(x=i - width/2, y=v+1, s=str(round(v, 2)), ha='center')
    for i, v in enumerate(y_VARMA):
        ax.text(x=i + width/2, y=v+1, s=str(round(v, 2)), ha='center')
    
plt.tight_layout()

## VARMAX 

In [None]:
def VARMAX_gridsearch(endog, exog, min_p, max_p, min_q, max_q):
    
    all_p = range(min_p, max_p+1, 1)
    all_q = range(min_q, max_q+1, 1)
    all_orders = list(product(all_p, all_q))
    
    print(f'Fitting {len(all_orders)} unique models')
    
    results = []
    
    for order in tqdm_notebook(all_orders):
        try:
            # Fit a VARMAX model
            
        except:
            continue
    
        results.append([order, model.aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)', 'AIC']
    
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

In [None]:
# Get your exogenous variable
# Index of exog must align with the training set

# Run the grid search


We don't need to redo the Granger causality test since $p=6$

### Residuals analysis 

In [None]:
# cows
VARMAX_model_fit.plot_diagnostics(variable=1);

In [None]:
# calves
VARMAX_model_fit.plot_diagnostics(variable=1);

### Forecasting 

In [None]:
def rolling_predictions(df, exog, last_train_value, train_len, horizon, window, method):
    
    total_len = train_len + horizon
    
    if method == 'VARMAX':

        cows_pred_VARMAX = []
        calves_pred_VARMAX = []
        
        for i in range(train_len, total_len, window):
            # Get he predictions from VARAMX
            
            oos_pred_cows = predictions.predicted_mean.iloc[-window:]['cows']
            oos_pred_calves = predictions.predicted_mean.iloc[-window:]['calves']
            
            cows_pred_VARMAX.extend(oos_pred_cows)
            calves_pred_VARMAX.extend(oos_pred_calves)
            
        cows_pred_VARMAX = np.insert(cows_pred_VARMAX, 0, last_train_value['cows'])
        cows_pred_VARMAX = cows_pred_VARMAX.cumsum()
        
        calves_pred_VARMAX = np.insert(calves_pred_VARMAX, 0, last_train_value['calves'])
        calves_pred_VARMAX = calves_pred_VARMAX.cumsum()
        
        return cows_pred_VARMAX[:horizon], calves_pred_VARMAX[:horizon]
    
    elif method == 'last':
        cows_pred_last = []
        calves_pred_last = []
        
        for i in range(train_len, total_len, window):
            
            cows_last = df[:i].iloc[-1]['cows']
            calves_last = df[:i].iloc[-1]['calves']
            
            cows_pred_last.extend(cows_last for _ in range(window))
            calves_pred_last.extend(calves_last for _ in range(window))
        
        cows_pred_last = np.insert(cows_pred_last, 0, last_train_value['cows'])
        cows_pred_last = cows_pred_last.cumsum()
        
        calves_pred_last = np.insert(calves_pred_last, 0, last_train_value['calves'])
        calves_pred_last = calves_pred_last.cumsum()
            
        return cows_pred_last[:horizon], calves_pred_last[:horizon]

In [None]:
pred_df = df[['cows', 'calves']][-48:]
exog = df['barley'][1:]

TRAIN_LEN = len(train)
HORIZON = len(test)
LAST_TRAIN_VALUE = endog.iloc[321]
WINDOW = 1

cows_pred_last, calves_pred_last = rolling_predictions(endog_diff, exog, LAST_TRAIN_VALUE, TRAIN_LEN, HORIZON, WINDOW, 'last')
cows_pred_VARMAX, calves_pred_VARMAX = rolling_predictions(endog_diff, exog, LAST_TRAIN_VALUE, TRAIN_LEN, HORIZON, WINDOW, 'VARMAX')
    
pred_df[f'cows_pred_last'] = cows_pred_last
pred_df[f'calves_pred_last'] = calves_pred_last
pred_df[f'cows_pred_VARMAX'] = cows_pred_VARMAX
pred_df[f'calves_pred_VARMAX'] = calves_pred_VARMAX
    
pred_df.head()

### Evaluation 

In [None]:
mae_cow_last = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_last'])
mae_calves_last = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_last'])
mae_cow_VARMAX = mean_absolute_error(pred_df['cows'], pred_df['cows_pred_VARMAX'])
mae_calves_VARMAX = mean_absolute_error(pred_df['calves'], pred_df['calves_pred_VARMAX'])

In [None]:
fig, ax = plt.subplots()

width = 0.3
labels = ['Cows', 'Calves']
x = np.arange(len(labels))

y_last = [mae_cow_last, mae_calves_last]
y_VARMAX = [mae_cow_VARMAX, mae_calves_VARMAX]

ax.bar(x - width/2, y_last, width, label='Last')
ax.bar(x + width/2, y_VARMAX, width, label='VARMAX')

ax.set_ylim(0, 15)
ax.set_ylabel('MAE')
ax.set_xlabel('Target')
ax.set_title(f'Horizon = 1')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend(loc='best')

for i, v in enumerate(y_last):
    ax.text(x=i - width/2, y=v+1, s=str(round(v, 2)), ha='center')
for i, v in enumerate(y_VARMAX):
    ax.text(x=i + width/2, y=v+1, s=str(round(v, 2)), ha='center')
    
plt.tight_layout()