In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# VARMA - Vector Autoregression with Moving Average
The only difference with VAR model is that the error terms are given the moving averag representation of order(q)

Data Source https://fred.stlouisfed.org/series/M2SL https://fred.stlouisfed.org/series/PCE

In [None]:
pip install pmdarima

## Import Libraries and Load the Data

In [None]:
import numpy as np 
import pandas as pd

# visualizations
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
rcParams['figure.figsize'] = (12,5)
%matplotlib inline

# time series related 
from pmdarima import auto_arima
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import mse,rmse
from statsmodels.tsa.statespace.varmax import VARMAX,VARMAXResults

# handle warnings
import warnings
warnings.filterwarnings(action='ignore',category=DeprecationWarning)
warnings.filterwarnings(action='ignore',category=FutureWarning)

In [None]:
# Load datasets
df = pd.read_csv('/kaggle/input/time-series-data-1/M2SLMoneyStock.csv',index_col=0, parse_dates=True)
df.index.freq = 'MS'

sp = pd.read_csv('/kaggle/input/time-series-data-1/PCEPersonalSpending.csv',index_col=0, parse_dates=True)
sp.index.freq = 'MS'

In [None]:
df.head()

In [None]:
sp.head()

In [None]:
df = df.join(sp)
df.head()

In [None]:
# drop the null values if any
df.dropna(inplace=True)
df.shape

## Plot the data

In [None]:
title = 'M2 Money Stock vs. Personal Consumption Expenditures'
ylabel= 'Billions of dollars'
xlabel= ''

ax = df['Spending'].plot(figsize=(12,5),title=title,legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
df['Money'].plot(legend=True);

## Test for Stationarity

Refer the other notebook https://www.kaggle.com/prakharprasad/time-series-vector-autoregression where the test for stationarity was done on both Money as well as the Spending feature. Order 2 difference makes the data stationary. For sake of brevity, I am skipping this step. 

## Decide the order of the VARMA(p,q) 

In [None]:
auto_arima(df['Money'],maxiter=100)

In [None]:
auto_arima(df['Spending'],maxiter=100)

Order (1,2) is preferred for VARMA. The last term or the third terms is the differencing which will be applied already using differencing. 

In [None]:
df_transformed = df.diff().diff() # 2nd order difference
df_transformed = df_transformed.dropna() # remove the NaNs introduced due to differencing
df_transformed.head()

In [None]:
len(df_transformed)

## Train Test Split

In [None]:
nobs = 12 # The last 12 months will be the test data. At least 1 year would be a good choice
train = df_transformed[0:-nobs]
test = df_transformed[-nobs:]

## Fit the VARMA(1,2) Model

In [None]:
model = VARMAX(train, order=(1,2), trend='c') # c indicates a constant trend
results = model.fit(maxiter=1000, disp=False)
results.summary()

## Predict the next 12 values

In [None]:
df_forecast = results.forecast(12)
df_forecast

## Invert the Transformations 
The data used for prediction was of 2nd order difference. The forecast would also be similar and hence it needs to be inverted to retrieve the true values which we can compare the original Money and Spending in the last 12 months dataframe. 

To roll back a first-order difference we take the most recent value on the training side of the original series, and add it to a cumulative sum of forecasted values. When working with second-order differences we first must perform this operation on the most recent first-order difference.

Here we'll use the <tt>nobs</tt> variable we defined during the train/test/split step.

**This was the toughest part to figure out. Best way is to take a small dataset and try this out manually, come out with the step or formula and generalize on the entire dataframe.**

In [None]:
# Add the most recent first difference from the training side of the original dataset to the forecast cumulative sum
df_forecast['Money1d'] = (df['Money'].iloc[-nobs-1]-df['Money'].iloc[-nobs-2]) + df_forecast['Money'].cumsum()
# Now build the forecast values from the first difference set
df_forecast['MoneyForecast'] = df['Money'].iloc[-nobs-1] + df_forecast['Money'].cumsum()

In [None]:
# Add the most recent first difference from the training side of the original dataset to the forecast cumulative sum
df_forecast['Spending1d'] = (df['Spending'].iloc[-nobs-1]-df['Spending'].iloc[-nobs-2]) + df_forecast['Spending'].cumsum()

# Now build the forecast values from the first difference set
df_forecast['SpendingForecast'] = df['Spending'].iloc[-nobs-1] + df_forecast['Spending'].cumsum()

In [None]:
df_forecast

In [None]:
pd.concat([df.iloc[-12:],df_forecast[['MoneyForecast','SpendingForecast']]],axis=1)

## Plot the results

In [None]:
df['Money'][-nobs:].plot(figsize=(12,5),legend=True).autoscale(axis='x',tight=True)
df_forecast['MoneyForecast'].plot(legend=True);

In [None]:
df['Spending'][-nobs:].plot(figsize=(12,5),legend=True).autoscale(axis='x',tight=True)
df_forecast['SpendingForecast'].plot(legend=True);

## Model Evaluation

In [None]:
RMSE1 = rmse(df['Money'][-nobs:], df_forecast['MoneyForecast'])
print(f'Money VAR(5) RMSE: {RMSE1:.3f}')

In [None]:
RMSE2 = rmse(df['Spending'][-nobs:], df_forecast['SpendingForecast'])
print(f'Spending VAR(5) RMSE: {RMSE2:.3f}')

# Summary

**The VARMA model fits very poorly for this dataset. Perhaps there is no good relationship between the Spending and and the Personal Disposable Income atleast for the period that I investigated here. Next step could be to compare the results to the ARMA or other models.**

In [None]:
nan