In [71]:
from pandas import read_csv
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import grangercausalitytests
from numpy import log
import numpy as np
import pandas as pd
import datetime as datetime

def zscore(df, col):
    col_mean = df[col].rolling(window=14, center=True).mean()
    col_std = df[col].rolling(window=14, center=True).std()
    return (df[col] - col_mean)/col_std

df = pd.read_csv('./data/11-26/daily_plutchik_threshold_5.csv', parse_dates=['month/day'])
approval_rating_df = pd.read_csv('./data/approval_polllist.csv', parse_dates=["enddate"])
topic_df = pd.read_excel('./data/TopicScoresPerDay_Final.xlsx')

In [72]:
df["Anticipation Z-Score"] = zscore(df, "Anticipation")
df["Trust Z-Score"] = zscore(df, "Trust")
df["Surprise Z-Score"] = zscore(df, "Surprise")
df["Sadness Z-Score"] = zscore(df, "Sadness")
df["Joy Z-Score"] = zscore(df, "Joy")
df["Fear Z-Score"] = zscore(df, "Fear")
df["Disgust Z-Score"] = zscore(df, "Disgust")
df["Anger Z-Score"] = zscore(df, "Anger")

start_date = datetime.datetime(2017, 7, 12, 0, 0)
end_date = datetime.datetime(2017, 12, 31, 0, 0)

dated_emotion_df = df[(df["month/day"] > start_date) & (df["month/day"] < end_date)]

In [73]:
topic_df = topic_df.drop(['Filename', 'Segment', 'WC', 'WPS', 'Sixltr', 'Dic', 
                          'AllPunc', 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 
                          'Exclam', 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 
                          'Em1', 'Em2', 'Em3', 'Em4', 'Em5', 'Em6', 'Em7', 'Em8', 'Gallup Value'], axis=1)

for (columnName, columnData) in topic_df.iteritems():
    topic_df[columnName + " Z-Score"] = zscore(topic_df, columnName)
    
topic_df = topic_df[66:237]
topic_df.fillna(0, inplace=True)
topic_df.head()

Unnamed: 0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,topic10,...,topic492 Z-Score,topic493 Z-Score,topic494 Z-Score,topic495 Z-Score,topic496 Z-Score,topic497 Z-Score,topic498 Z-Score,topic499 Z-Score,topic500 Z-Score,topic501 Z-Score
66,6.46,0.0,0.0,0.05,0.0,0.01,0.64,0.57,0.19,0.65,...,0.194023,2.163111,1.089956,-0.491032,1.429682,0.413594,-0.702665,1.065521,1.43284,-0.507075
67,6.12,0.0,0.0,0.04,0.0,0.01,0.46,0.65,0.08,0.49,...,0.769534,-0.151553,-0.433117,-0.479883,0.629919,1.120638,-0.401764,-1.296174,-1.321625,-0.372797
68,6.2,0.0,0.01,0.01,0.0,0.02,0.61,0.51,0.1,0.69,...,0.461597,-1.572928,-0.890207,1.424433,1.097163,1.309574,-0.475466,-1.355222,-2.163202,-0.066748
69,6.08,0.0,0.0,0.03,0.0,0.02,0.47,0.5,0.07,0.67,...,-0.503236,0.630641,2.372378,2.064031,-0.495156,-0.901418,-0.321563,0.105594,0.360875,-0.514077
70,5.97,0.0,0.0,0.06,0.0,0.02,0.45,0.63,0.06,0.21,...,-0.493518,-0.165357,-1.091842,-0.820724,-0.3383,-1.210769,-1.583182,1.308417,0.067327,-0.723097


In [79]:
def check_stationarity(column, title):
    result = adfuller(column)
    for key, value in result[4].items():
        if result[0] > value:
            print('Found a non-stationary time series:', title)
            print('ADF Statistic: %f' % result[0])
            print('p-value: %f' % result[1])
            for key, value in result[4].items():
                print('\t%s: %.3f' % (key, value))
        if result[1] > 0.05:
            print('Found a p-value greater than threshold')

In [80]:
for i in range(1, 502):
    title = 'topic'+str(i)+' Z-Score'
    check_stationarity(topic_df[title], title)

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
  return self.params / self.bse


In [81]:
for (columnName, columnData) in dated_emotion_df.iteritems():
    if columnName.endswith('Z-Score'):
        check_stationarity(dated_emotion_df[columnName], columnName)

# p-values are all <= 0.05 and ADF Statistics are < values at 1%, 5%, and 10%

In [26]:
truncated_approvals = approval_rating_df.loc[approval_rating_df['pollster'] == 'Ipsos']
truncated_approvals = truncated_approvals.loc[truncated_approvals['subgroup'] == 'All polls']
truncated_approvals = truncated_approvals[(truncated_approvals['enddate'] > start_date) & (truncated_approvals['enddate'] < end_date)]
truncated_approvals = truncated_approvals['adjusted_approve']

approvals_array = truncated_approvals.values

In [27]:
x = np.asarray([approvals_array, dated_emotion_df["Anticipation Z-Score"]]).T
print(x, len(x))

[[  4.13517300e+01   2.25208662e+00]
 [  4.22517300e+01   6.67345455e-01]
 [  4.09517300e+01   8.59557232e-01]
 [  4.09517300e+01   6.15612029e-02]
 [  3.84517300e+01  -6.88658723e-01]
 [  3.83517300e+01  -9.82259940e-01]
 [  3.90517300e+01  -1.30968970e+00]
 [  3.90517300e+01  -8.18877878e-01]
 [  3.87517300e+01  -2.05869068e-01]
 [  3.75517300e+01  -2.63694855e-01]
 [  3.70517300e+01   2.36405924e-01]
 [  3.58517300e+01  -6.69419605e-02]
 [  3.59517300e+01   5.09174995e-01]
 [  3.64517300e+01  -1.15977628e+00]
 [  3.84517300e+01   2.23067716e+00]
 [  3.73517300e+01   4.17294169e-01]
 [  3.75517300e+01  -3.87182946e-01]
 [  3.77517300e+01   1.82748733e-01]
 [  3.78517300e+01  -6.23055687e-01]
 [  3.78517300e+01  -1.82058715e-01]
 [  3.82517300e+01  -8.97068455e-01]
 [  3.76517300e+01   1.27827094e+00]
 [  3.65517300e+01   2.71160707e+00]
 [  3.57517300e+01   1.79314882e-01]
 [  3.52517300e+01  -2.03393153e-01]
 [  3.71517300e+01  -4.87493038e-01]
 [  3.80517300e+01   8.97848409e-02]
 

In [113]:
def find_significant_time_series(time_series, columnName, result_set):
    x = np.asarray([approvals_array, time_series[columnName]]).T
    results = grangercausalitytests(x, maxlag=7, verbose=False)
    for lag, test in results.items():
        for test, values in results[lag][0].items():
            if values[1] <= 0.05:
                print("found a significant test result for column:", columnName, "with", lag, "lags")
                result_set.add(columnName)

In [114]:
significant_emotions = set()
for (columnName, columnData) in dated_emotion_df.iteritems():
    if columnName.endswith('Z-Score'):
        find_significant_time_series(dated_emotion_df, columnName, significant_emotions)

found a significant test result for column: Trust Z-Score with 1 lags
found a significant test result for column: Trust Z-Score with 1 lags
found a significant test result for column: Trust Z-Score with 1 lags
found a significant test result for column: Trust Z-Score with 1 lags
found a significant test result for column: Trust Z-Score with 2 lags
found a significant test result for column: Trust Z-Score with 2 lags
found a significant test result for column: Trust Z-Score with 2 lags
found a significant test result for column: Trust Z-Score with 2 lags
found a significant test result for column: Trust Z-Score with 4 lags
found a significant test result for column: Trust Z-Score with 4 lags
found a significant test result for column: Trust Z-Score with 4 lags
found a significant test result for column: Trust Z-Score with 4 lags
found a significant test result for column: Trust Z-Score with 5 lags
found a significant test result for column: Trust Z-Score with 5 lags
found a significant 

In [119]:
for emotion in significant_emotions:
    print(emotion)

Trust Z-Score
Surprise Z-Score
Joy Z-Score


In [115]:
significant_topics = set()
for (columnName, columnData) in topic_df.iteritems():
    if columnName.endswith('Z-Score'):
        find_significant_time_series(topic_df, columnName, significant_topics)

  F /= J


found a significant test result for column: topic6 Z-Score with 1 lags
found a significant test result for column: topic6 Z-Score with 1 lags
found a significant test result for column: topic6 Z-Score with 1 lags
found a significant test result for column: topic6 Z-Score with 1 lags
found a significant test result for column: topic6 Z-Score with 2 lags
found a significant test result for column: topic6 Z-Score with 2 lags
found a significant test result for column: topic6 Z-Score with 2 lags
found a significant test result for column: topic6 Z-Score with 2 lags
found a significant test result for column: topic6 Z-Score with 3 lags
found a significant test result for column: topic6 Z-Score with 3 lags
found a significant test result for column: topic6 Z-Score with 3 lags
found a significant test result for column: topic6 Z-Score with 3 lags
found a significant test result for column: topic10 Z-Score with 2 lags
found a significant test result for column: topic28 Z-Score with 1 lags
foun

found a significant test result for column: topic88 Z-Score with 1 lags
found a significant test result for column: topic88 Z-Score with 1 lags
found a significant test result for column: topic88 Z-Score with 1 lags
found a significant test result for column: topic88 Z-Score with 1 lags
found a significant test result for column: topic88 Z-Score with 5 lags
found a significant test result for column: topic88 Z-Score with 5 lags
found a significant test result for column: topic88 Z-Score with 5 lags
found a significant test result for column: topic88 Z-Score with 5 lags
found a significant test result for column: topic97 Z-Score with 4 lags
found a significant test result for column: topic97 Z-Score with 4 lags
found a significant test result for column: topic97 Z-Score with 4 lags
found a significant test result for column: topic97 Z-Score with 4 lags
found a significant test result for column: topic97 Z-Score with 5 lags
found a significant test result for column: topic97 Z-Score with

found a significant test result for column: topic132 Z-Score with 6 lags
found a significant test result for column: topic132 Z-Score with 6 lags
found a significant test result for column: topic132 Z-Score with 6 lags
found a significant test result for column: topic132 Z-Score with 7 lags
found a significant test result for column: topic132 Z-Score with 7 lags
found a significant test result for column: topic132 Z-Score with 7 lags
found a significant test result for column: topic132 Z-Score with 7 lags
found a significant test result for column: topic142 Z-Score with 3 lags
found a significant test result for column: topic142 Z-Score with 3 lags
found a significant test result for column: topic142 Z-Score with 5 lags
found a significant test result for column: topic142 Z-Score with 5 lags
found a significant test result for column: topic142 Z-Score with 5 lags
found a significant test result for column: topic142 Z-Score with 5 lags
found a significant test result for column: topic14

found a significant test result for column: topic185 Z-Score with 4 lags
found a significant test result for column: topic185 Z-Score with 4 lags
found a significant test result for column: topic185 Z-Score with 4 lags
found a significant test result for column: topic185 Z-Score with 4 lags
found a significant test result for column: topic185 Z-Score with 5 lags
found a significant test result for column: topic185 Z-Score with 5 lags
found a significant test result for column: topic185 Z-Score with 5 lags
found a significant test result for column: topic185 Z-Score with 5 lags
found a significant test result for column: topic185 Z-Score with 6 lags
found a significant test result for column: topic185 Z-Score with 6 lags
found a significant test result for column: topic185 Z-Score with 6 lags
found a significant test result for column: topic185 Z-Score with 6 lags
found a significant test result for column: topic185 Z-Score with 7 lags
found a significant test result for column: topic18

found a significant test result for column: topic237 Z-Score with 1 lags
found a significant test result for column: topic237 Z-Score with 1 lags
found a significant test result for column: topic237 Z-Score with 3 lags
found a significant test result for column: topic237 Z-Score with 3 lags
found a significant test result for column: topic237 Z-Score with 3 lags
found a significant test result for column: topic237 Z-Score with 3 lags
found a significant test result for column: topic237 Z-Score with 7 lags
found a significant test result for column: topic238 Z-Score with 1 lags
found a significant test result for column: topic238 Z-Score with 1 lags
found a significant test result for column: topic238 Z-Score with 1 lags
found a significant test result for column: topic238 Z-Score with 1 lags
found a significant test result for column: topic239 Z-Score with 5 lags
found a significant test result for column: topic239 Z-Score with 5 lags
found a significant test result for column: topic23

found a significant test result for column: topic286 Z-Score with 3 lags
found a significant test result for column: topic286 Z-Score with 3 lags
found a significant test result for column: topic286 Z-Score with 3 lags
found a significant test result for column: topic286 Z-Score with 4 lags
found a significant test result for column: topic286 Z-Score with 4 lags
found a significant test result for column: topic286 Z-Score with 4 lags
found a significant test result for column: topic286 Z-Score with 4 lags
found a significant test result for column: topic286 Z-Score with 5 lags
found a significant test result for column: topic286 Z-Score with 5 lags
found a significant test result for column: topic286 Z-Score with 7 lags
found a significant test result for column: topic286 Z-Score with 7 lags
found a significant test result for column: topic289 Z-Score with 4 lags
found a significant test result for column: topic289 Z-Score with 4 lags
found a significant test result for column: topic28

found a significant test result for column: topic337 Z-Score with 1 lags
found a significant test result for column: topic346 Z-Score with 1 lags
found a significant test result for column: topic346 Z-Score with 1 lags
found a significant test result for column: topic346 Z-Score with 1 lags
found a significant test result for column: topic346 Z-Score with 1 lags
found a significant test result for column: topic346 Z-Score with 2 lags
found a significant test result for column: topic347 Z-Score with 2 lags
found a significant test result for column: topic347 Z-Score with 2 lags
found a significant test result for column: topic347 Z-Score with 2 lags
found a significant test result for column: topic347 Z-Score with 2 lags
found a significant test result for column: topic349 Z-Score with 1 lags
found a significant test result for column: topic349 Z-Score with 1 lags
found a significant test result for column: topic349 Z-Score with 1 lags
found a significant test result for column: topic34

found a significant test result for column: topic377 Z-Score with 1 lags
found a significant test result for column: topic377 Z-Score with 1 lags
found a significant test result for column: topic377 Z-Score with 1 lags
found a significant test result for column: topic377 Z-Score with 1 lags
found a significant test result for column: topic380 Z-Score with 2 lags
found a significant test result for column: topic380 Z-Score with 2 lags
found a significant test result for column: topic380 Z-Score with 2 lags
found a significant test result for column: topic380 Z-Score with 2 lags
found a significant test result for column: topic382 Z-Score with 4 lags
found a significant test result for column: topic382 Z-Score with 4 lags
found a significant test result for column: topic382 Z-Score with 4 lags
found a significant test result for column: topic382 Z-Score with 4 lags
found a significant test result for column: topic382 Z-Score with 5 lags
found a significant test result for column: topic38

found a significant test result for column: topic448 Z-Score with 6 lags
found a significant test result for column: topic448 Z-Score with 6 lags
found a significant test result for column: topic448 Z-Score with 7 lags
found a significant test result for column: topic452 Z-Score with 3 lags
found a significant test result for column: topic452 Z-Score with 3 lags
found a significant test result for column: topic452 Z-Score with 3 lags
found a significant test result for column: topic452 Z-Score with 3 lags
found a significant test result for column: topic452 Z-Score with 4 lags
found a significant test result for column: topic452 Z-Score with 4 lags
found a significant test result for column: topic452 Z-Score with 4 lags
found a significant test result for column: topic452 Z-Score with 4 lags
found a significant test result for column: topic452 Z-Score with 5 lags
found a significant test result for column: topic452 Z-Score with 5 lags
found a significant test result for column: topic45

found a significant test result for column: topic492 Z-Score with 5 lags
found a significant test result for column: topic492 Z-Score with 5 lags
found a significant test result for column: topic492 Z-Score with 5 lags
found a significant test result for column: topic492 Z-Score with 5 lags


In [122]:
print(len(significant_topics))
for topic in significant_topics:
    print(topic)

124
topic237 Z-Score
topic164 Z-Score
topic437 Z-Score
topic142 Z-Score
topic196 Z-Score
topic377 Z-Score
topic382 Z-Score
topic67 Z-Score
topic473 Z-Score
topic297 Z-Score
topic452 Z-Score
topic79 Z-Score
topic335 Z-Score
topic484 Z-Score
topic430 Z-Score
topic50 Z-Score
topic6 Z-Score
topic418 Z-Score
topic203 Z-Score
topic74 Z-Score
topic112 Z-Score
topic99 Z-Score
topic289 Z-Score
topic322 Z-Score
topic476 Z-Score
topic380 Z-Score
topic399 Z-Score
topic461 Z-Score
topic201 Z-Score
topic333 Z-Score
topic63 Z-Score
topic40 Z-Score
topic403 Z-Score
topic319 Z-Score
topic374 Z-Score
topic324 Z-Score
topic240 Z-Score
topic200 Z-Score
topic211 Z-Score
topic49 Z-Score
topic39 Z-Score
topic162 Z-Score
topic457 Z-Score
topic185 Z-Score
topic349 Z-Score
topic132 Z-Score
topic109 Z-Score
topic88 Z-Score
topic274 Z-Score
topic492 Z-Score
topic276 Z-Score
topic97 Z-Score
topic150 Z-Score
topic347 Z-Score
topic165 Z-Score
topic221 Z-Score
topic286 Z-Score
topic126 Z-Score
topic404 Z-Score
topic2

# Example of Granger Causality Test output 

In [29]:
x = np.asarray([approvals_array, dated_emotion_df["Trust Z-Score"]]).T
grangercausalitytests(x, maxlag=7)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.4970  , p=0.0202  , df_denom=167, df_num=1
ssr based chi2 test:   chi2=5.5957  , p=0.0180  , df=1
likelihood ratio test: chi2=5.5056  , p=0.0190  , df=1
parameter F test:         F=5.4970  , p=0.0202  , df_denom=167, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.4721  , p=0.0334  , df_denom=164, df_num=2
ssr based chi2 test:   chi2=7.1558  , p=0.0279  , df=2
likelihood ratio test: chi2=7.0085  , p=0.0301  , df=2
parameter F test:         F=3.4721  , p=0.0334  , df_denom=164, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.2669  , p=0.0828  , df_denom=161, df_num=3
ssr based chi2 test:   chi2=7.0963  , p=0.0689  , df=3
likelihood ratio test: chi2=6.9506  , p=0.0735  , df=3
parameter F test:         F=2.2669  , p=0.0828  , df_denom=161, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=2.8883  , p=0.0242  

{1: ({'lrtest': (5.505609732254527, 0.018955570283706202, 1),
   'params_ftest': (5.4969839581966466, 0.020225482168537815, 167.0, 1.0),
   'ssr_chi2test': (5.5957321730144933, 0.018004284631946768, 1),
   'ssr_ftest': (5.4969839581965916, 0.020225482168538346, 167.0, 1)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c0fd3f4e0>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c0fd3f518>,
   array([[ 0.,  1.,  0.]])]),
 2: ({'lrtest': (7.0084671586536729, 0.030069810639427971, 2),
   'params_ftest': (3.4720538653599715, 0.03335465188161163, 164.0, 2.0),
   'ssr_chi2test': (7.1558183322656976, 0.027934042682226719, 2),
   'ssr_ftest': (3.4720538653596877, 0.033354651881620734, 164.0, 2)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x10afc2320>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x1c0fd3f940>,
   array([[ 0.,  0.,  1.,  0.,  0.],
          [ 0.,  0.,  0.,  1.,  0.]])]),
 3: ({'lrtes