In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.api import VAR
from statsmodels.tsa.statespace.varmax import VARMAX

from statsmodels.tsa.stattools import adfuller, grangercausalitytests
from statsmodels.tsa.arima.model import ARIMA

#For inline plotting 
%matplotlib inline                 
%config InlineBackend.figure_format = 'svg'

plt.style.use("seaborn-v0_8-dark")  

In [None]:
# get the data 

#!wget -nc https://lazyprogrammer.me/course_files/timeseries/us_macro_quarterly.xlsx

In [2]:
df = pd.read_excel('us_macro_quarterly.xlsx')

df.head()

  for idx, row in parser.parse():


Unnamed: 0.1,Unnamed: 0,GDPC96,JAPAN_IP,PCECTPI,GS10,GS1,TB3MS,UNRATE,EXUSUK,CPIAUCSL
0,1957:01,2851.778,8.414363,16.449,3.403333,3.39,3.096667,3.933333,,27.776667
1,1957:02,2845.453,9.097347,16.553,3.626667,3.54,3.14,4.1,,28.013333
2,1957:03,2873.169,9.042708,16.687,3.926667,3.963333,3.353333,4.233333,,28.263333
3,1957:04,2843.718,8.796834,16.773,3.633333,3.586667,3.31,4.933333,,28.4
4,1958:01,2770.0,8.632918,16.978,3.04,2.16,1.756667,6.3,,28.736667


In [4]:
# Unnamed column represent the date which is given in quarterlery intervals 

def custom_parser(date): # note that input will be an dtype of object as in the df 

    year, quarter = date.split(':')

    month = (int(quarter)-1) * 3 + 1 # map quarter 1 to Jan (as the beginning month of that quarter), quarter 2 is mapped to April and so on... 

    new_date = f"{year}-{month}"

    return datetime.strptime(new_date, '%Y-%m')

In [5]:
df['Date'] = df['Unnamed: 0'].apply(custom_parser)

df.set_index('Date', inplace=True)
df.drop('Unnamed: 0', axis = 1, inplace =True)
# set frequency 
df.index.freq = 'QS'

In [6]:
# GDP growth and Term Spread

df['GDPGrowth'] = np.log(df['GDPC96']).diff()
df['TSpread'] = df['GS10'] - df['TB3MS']

In [7]:
cols = ['GDPGrowth', 'TSpread'] # we did not differences Term Spreat but we could as well, it seems ok given the plot above 

df1 = df[cols][1:].copy() # ignore the first row as we differenced the the gdp column to get te gdpgrowth 

In [8]:
df1.head()

Unnamed: 0_level_0,GDPGrowth,TSpread
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1957-04-01,-0.00222,0.486667
1957-07-01,0.009693,0.573333
1957-10-01,-0.010303,0.323333
1958-01-01,-0.026265,1.283333
1958-04-01,0.006577,1.966667


Check the [link](https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.grangercausalitytests.html) for the documentation of the Granger causality test. 

"The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test.

The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero."


In [11]:
granger_res = grangercausalitytests(df1, maxlag = 18)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=6.1781  , p=0.0137  , df_denom=223, df_num=1
ssr based chi2 test:   chi2=6.2612  , p=0.0123  , df=1
likelihood ratio test: chi2=6.1761  , p=0.0129  , df=1
parameter F test:         F=6.1781  , p=0.0137  , df_denom=223, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=4.9160  , p=0.0082  , df_denom=220, df_num=2
ssr based chi2 test:   chi2=10.0555 , p=0.0066  , df=2
likelihood ratio test: chi2=9.8373  , p=0.0073  , df=2
parameter F test:         F=4.9160  , p=0.0082  , df_denom=220, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=3.4458  , p=0.0176  , df_denom=217, df_num=3
ssr based chi2 test:   chi2=10.6710 , p=0.0136  , df=3
likelihood ratio test: chi2=10.4246 , p=0.0153  , df=3
parameter F test:         F=3.4458  , p=0.0176  , df_denom=217, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=2.8964  , p=0.0230  

p-values are statistically significant up to certain lag. 

In [12]:
granger_res_rev= grangercausalitytests(df1[reversed(cols)], maxlag = 18)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=11.9872 , p=0.0006  , df_denom=223, df_num=1
ssr based chi2 test:   chi2=12.1485 , p=0.0005  , df=1
likelihood ratio test: chi2=11.8332 , p=0.0006  , df=1
parameter F test:         F=11.9872 , p=0.0006  , df_denom=223, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=5.4010  , p=0.0051  , df_denom=220, df_num=2
ssr based chi2 test:   chi2=11.0476 , p=0.0040  , df=2
likelihood ratio test: chi2=10.7849 , p=0.0046  , df=2
parameter F test:         F=5.4010  , p=0.0051  , df_denom=220, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=4.8793  , p=0.0026  , df_denom=217, df_num=3
ssr based chi2 test:   chi2=15.1100 , p=0.0017  , df=3
likelihood ratio test: chi2=14.6221 , p=0.0022  , df=3
parameter F test:         F=4.8793  , p=0.0026  , df_denom=217, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=3.1513  , p=0.0152  

reversed granger causality is more statistically significant for all the lags we look for! We should think of Granger causality is actually as a test of causality, rather it is a test of forecasting ability. From the results above, we can say that certain lagged values of the GDPGrowth can be used as input features to predict Term Spread! 