In [91]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests


In [92]:
sp500 = pd.read_csv("./stocks/sp500.csv", parse_dates = True, index_col = 'Date')

In [93]:
sp500

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-02-01,2285.590088,2289.139893,2272.439941,2279.550049
2017-02-02,2276.689941,2283.969971,2271.649902,2280.850098
2017-02-03,2288.540039,2298.310059,2287.879883,2297.419922
2017-02-06,2294.280029,2296.179932,2288.570068,2292.560059
2017-02-07,2295.870117,2299.399902,2290.159912,2293.080078
...,...,...,...,...
2020-12-23,3693.419922,3711.239990,3689.280029,3690.010010
2020-12-24,3694.030029,3703.820068,3689.320068,3703.060059
2020-12-28,3723.030029,3740.510010,3723.030029,3735.360107
2020-12-29,3750.010010,3756.120117,3723.310059,3727.040039


In [94]:
data_vader = pd.read_csv("./sentiment/data_vader.csv", parse_dates = True, index_col = 'Date')

In [95]:
data = {}
for column_name in data_vader.columns[4:]:
    cols_to_keep = ['Close']
    cols_to_keep.append(column_name)
    data.update({column_name : data_vader[cols_to_keep]})

In [96]:
data

{'biden':                   Close    biden
 Date                            
 2017-02-01  2279.550049  0.04150
 2017-02-02  2280.850098  0.97810
 2017-02-03  2297.419922  0.08900
 2017-02-06  2292.560059  0.08900
 2017-02-07  2293.080078 -0.62490
 ...                 ...      ...
 2020-12-23  3690.010010  0.12035
 2020-12-24  3703.060059  0.12035
 2020-12-28  3735.360107  0.12035
 2020-12-29  3727.040039  0.12035
 2020-12-30  3732.040039  0.12035
 
 [986 rows x 2 columns],
 'musk':                   Close      musk
 Date                             
 2017-02-01  2279.550049  0.401900
 2017-02-02  2280.850098  0.401900
 2017-02-03  2297.419922  0.000000
 2017-02-06  2292.560059  0.169778
 2017-02-07  2293.080078  0.169778
 ...                 ...       ...
 2020-12-23  3690.010010 -0.018150
 2020-12-24  3703.060059  0.198250
 2020-12-28  3735.360107  0.123214
 2020-12-29  3727.040039  0.162985
 2020-12-30  3732.040039  0.212772
 
 [986 rows x 2 columns],
 'trump':                   Clos

In [97]:
def print_best_granger_causality(df, name, max_lag):
    results = grangercausalitytests(df[[name , 'Close']], maxlag=max_lag, verbose=False)
    best_lag = None
    best_result = None

    for lag, result in results.items():
        if best_result is None or result[0]['lrtest'][1] < best_result[0]['lrtest'][1]:
            best_lag = lag
            best_result = result

    print(f"Best Granger Causality between '{j}' and Close price:")
    lags.update({j : best_lag})
    print(f"Number of lags: {best_lag}")
    print(f"ssr based F test: F={best_result[0]['params_ftest'][0]}, p={best_result[0]['params_ftest'][1]}")
    print(f"ssr based chi2 test: chi2={best_result[0]['ssr_chi2test'][0]}, p={best_result[0]['ssr_chi2test'][1]}")
    print(f"likelihood ratio test: chi2={best_result[0]['lrtest'][0]}, p={best_result[0]['lrtest'][1]}")
    print(f"parameter F test: F={best_result[0]['params_ftest'][0]}, p={best_result[0]['params_ftest'][1]}")

In [98]:
lags = {}
for j in data.keys():
    print_best_granger_causality(data[j], j , 5)

Best Granger Causality between 'biden' and Close price:
Number of lags: 1
ssr based F test: F=1.6478171394068823, p=0.19955910400315524
ssr based chi2 test: chi2=1.6528512039876575, p=0.19857131084209903
likelihood ratio test: chi2=1.6514659934296105, p=0.19875951895439056
parameter F test: F=1.6478171394068823, p=0.19955910400315524
Best Granger Causality between 'musk' and Close price:
Number of lags: 2
ssr based F test: F=0.6768560326696601, p=0.5084499463153778
ssr based chi2 test: chi2=1.3606258143958319, p=0.5064584930611717
likelihood ratio test: chi2=1.3596859781246167, p=0.5066965430196093
parameter F test: F=0.6768560326696601, p=0.5084499463153778
Best Granger Causality between 'trump' and Close price:
Number of lags: 1
ssr based F test: F=19.996882553655333, p=8.66173803579489e-06
ssr based chi2 test: chi2=20.05797282622241, p=7.51296589388249e-06
likelihood ratio test: chi2=19.856479130683965, p=8.347930992855778e-06
parameter F test: F=19.996882553655333, p=8.661738035794

