In [1]:
from pandas import read_csv
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import grangercausalitytests
from numpy import log
import numpy as np
import pandas as pd
import datetime as datetime

def zscore(df, col):
    col_mean = df[col].rolling(window=10).mean()
    col_std = df[col].rolling(window=10).std()
    return (df[col] - col_mean)/col_std

df = pd.read_csv('./data/11-8/to_trump_2M_dates_res/daily_plutchik_onehot.csv', parse_dates=['month/day'])
approval_rating_df = pd.read_csv('./data/approval_polllist.csv', parse_dates=["enddate"])

df["Anticipation Z-Score"] = zscore(df, "Anticipation")
df["Trust Z-Score"] = zscore(df, "Trust")
df["Surprise Z-Score"] = zscore(df, "Surprise")
df["Sadness Z-Score"] = zscore(df, "Sadness")
df["Joy Z-Score"] = zscore(df, "Joy")
df["Fear Z-Score"] = zscore(df, "Fear")
df["Disgust Z-Score"] = zscore(df, "Disgust")
df["Anger Z-Score"] = zscore(df, "Anger")

start_date = datetime.datetime(2017, 5, 15, 0, 0)
end_date = datetime.datetime(2017, 11, 17, 0, 0)

truncated_df = df[(df["month/day"] > start_date) & (df["month/day"] < end_date)]

In [2]:
result = adfuller(truncated_df["Anticipation Z-Score"])

In [3]:
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -11.027296
p-value: 0.000000
	1%: -3.467
	5%: -2.877
	10%: -2.575


In [4]:
result = adfuller(truncated_df["Trust Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -11.493683
p-value: 0.000000
	1%: -3.467
	5%: -2.877
	10%: -2.575


In [5]:
result = adfuller(truncated_df["Surprise Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -9.866922
p-value: 0.000000
	1%: -3.467
	5%: -2.877
	10%: -2.575


In [6]:
result = adfuller(truncated_df["Sadness Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -11.540513
p-value: 0.000000
	1%: -3.467
	5%: -2.877
	10%: -2.575


In [7]:
result = adfuller(truncated_df["Joy Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -9.548665
p-value: 0.000000
	1%: -3.467
	5%: -2.877
	10%: -2.575


In [8]:
result = adfuller(truncated_df["Fear Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -8.870502
p-value: 0.000000
	1%: -3.467
	5%: -2.878
	10%: -2.575


In [9]:
result = adfuller(truncated_df["Disgust Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -9.839633
p-value: 0.000000
	1%: -3.467
	5%: -2.877
	10%: -2.575


In [10]:
result = adfuller(truncated_df["Anger Z-Score"])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -6.274008
p-value: 0.000000
	1%: -3.467
	5%: -2.878
	10%: -2.575


In [11]:
# p-value is <= 0.05 and ADF Statistic is < values at 1%, 5%, and 10%

In [12]:
truncated_approvals = approval_rating_df.loc[approval_rating_df['pollster'] == 'Gallup']
truncated_approvals = truncated_approvals.loc[truncated_approvals['subgroup'] == 'All polls']
truncated_approvals = truncated_approvals[(truncated_approvals['enddate'] > start_date) & (truncated_approvals['enddate'] <= end_date)]
truncated_approvals = truncated_approvals['adjusted_approve']

approvals_array = truncated_approvals.values

In [15]:
x = np.asarray([approvals_array, truncated_df["Anticipation Z-Score"]]).T

In [16]:
grangercausalitytests(x, maxlag=7)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=2.4484  , p=0.1194  , df_denom=180, df_num=1
ssr based chi2 test:   chi2=2.4892  , p=0.1146  , df=1
likelihood ratio test: chi2=2.4724  , p=0.1159  , df=1
parameter F test:         F=2.4484  , p=0.1194  , df_denom=180, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=1.2479  , p=0.2896  , df_denom=177, df_num=2
ssr based chi2 test:   chi2=2.5663  , p=0.2772  , df=2
likelihood ratio test: chi2=2.5484  , p=0.2797  , df=2
parameter F test:         F=1.2479  , p=0.2896  , df_denom=177, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.1339  , p=0.3369  , df_denom=174, df_num=3
ssr based chi2 test:   chi2=3.5385  , p=0.3158  , df=3
likelihood ratio test: chi2=3.5044  , p=0.3202  , df=3
parameter F test:         F=1.1339  , p=0.3369  , df_denom=174, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.8377  , p=0.5030  

{1: ({'lrtest': (2.4724119436507408, 0.11586001175149435, 1),
   'params_ftest': (2.4483827315542994, 0.11940239177003237, 180.0, 1.0),
   'ssr_chi2test': (2.4891891104137573, 0.11463077171933894, 1),
   'ssr_ftest': (2.4483827315545152, 0.11940239177001544, 180.0, 1)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1135f6cf8>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1135f6828>,
   array([[ 0.,  1.,  0.]])]),
 2: ({'lrtest': (2.5484214543347434, 0.27965160255210747, 2),
   'params_ftest': (1.2479214435659167, 0.28961438783346055, 177.0, 2.0),
   'ssr_chi2test': (2.5663469234917899, 0.27715635827374768, 2),
   'ssr_ftest': (1.2479214435660626, 0.28961438783341797, 177.0, 2)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1135f6f98>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1135f6e80>,
   array([[ 0.,  0.,  1.,  0.,  0.],
          [ 0.,  0.,  0.,  1.,  0.]])]),
 3: ({'lrtest': (3.50

In [17]:
x = np.asarray([approvals_array, truncated_df["Trust Z-Score"]]).T
grangercausalitytests(x, maxlag=7)