In [25]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import sklearn
import math
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller # Dickey-Fuller test
from statsmodels.tsa.statespace.tools import diff # difference operator 
%matplotlib inline

In [18]:
gdp_df = pd.read_csv("./data/USGDP.csv")
money_df = pd.read_csv("./data/USMoney.csv")
print("gdp:" + str(gdp_df.shape) + " money:" + str(money_df.shape)) 

gdp:(283, 4) money:(708, 5)


In [19]:
gdp_df.GDP = gdp_df.GDP.apply(lambda x: float(x.replace(',','')))

In [None]:
# adjust for inflation using GDP deflator. 
amoney_df.M1 = (amoney_df.M1 / gdp_df.GDPDEF) * 100
amoney_df.M2 = (amoney_df.M2 / gdp_df.GDPDEF) * 100

In [20]:
# transform months to quarters and average the data
amoney_df = money_df.groupby(money_df.index // 3, ).mean()
amoney_df.Month = (amoney_df.index % 4) + 1
amoney_df = amoney_df.rename(columns={'Month':'Quarter'})

In [22]:
# add real GDP by adjusting GDP by GDP deflator. 
gdp_df.GDP = ((gdp_df.GDP / gdp_df.GDPDEF) * 100)
gdp_cols = ['Year', 'Quarter', 'GDP']
amoney_df = pd.merge(amoney_df, gdp_df[['Year', 'Quarter', 'GDP']], on=['Year', 'Quarter'], how='inner')
amoney_df = amoney_df.rename(columns={'GDP':'RGDP'})

In [23]:
# create an annual version of the data. 
qmoney_df = amoney_df  # quarters
ymoney_df = amoney_df.head(232).groupby(amoney_df.head(232).index // 4, ).mean()
ymoney_df.Year =  ymoney_df.Year.astype(int)
ymoney_df.shape

(58, 6)

In [24]:
# quarterly train and test. 
q_lnM1 = np.log(qmoney_df.M1) / np.log(math.e)
q_lnM2 = np.log(qmoney_df.M2) / np.log(math.e)
q_X = qmoney_df.drop(['M1', 'M2', 'Year', 'Quarter'], axis=1)
q_X = np.log(q_X) / np.log(math.e)
q_X = sm.add_constant(q_X)  # add a column of ones. 

#yearly train and test
y_lnM1 = np.log(ymoney_df.M1) / np.log(math.e)
y_lnM2 = np.log(ymoney_df.M2) / np.log(math.e)
y_X = ymoney_df.drop(['M1', 'M2', 'Year', 'Quarter'], axis=1)
y_X = np.log(y_X) / np.log(math.e)
y_X = sm.add_constant(y_X)

In [46]:
# Prefrom the Dickey-Fuller test on ln(RGDP)
# this function automatically can choose the correct number of difference
i = 0
result = None
while True: 
#     if i != 0:
#         q_X_d = diff(q_X.RGDP, k_diff=i)
#         result = adfuller(q_X_d, maxlag=1, regression='c', autolag=None)
#     else: 
#         result = adfuller(q_X.RGDP, maxlag=1, regression='c', autolag=None)
    q_X_d = diff(q_X.RGDP, k_diff=i)
    result = adfuller(q_X_d, regression='c', autolag='BIC')
    print("-------------------------------")
    print(str(i) + "th degree difference")
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    print('used lags: %f' % result[2])
    print('Critical Values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))
    print("-------------------------------")
    if result[1] < 0.05: 
        break
    i += 1

-------------------------------
0th degree difference
ADF Statistic: -2.289823
p-value: 0.175271
used lags: 1.000000
Critical Values:
	1%: -3.459
	5%: -2.874
	10%: -2.573
-------------------------------
-------------------------------
1th degree difference
ADF Statistic: -7.464437
p-value: 0.000000
used lags: 1.000000
Critical Values:
	1%: -3.459
	5%: -2.874
	10%: -2.573
-------------------------------
