In [1]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)

from statsmodels.tsa.stattools import grangercausalitytests

# Import data

In [2]:
fslr = pd.read_csv('./data_first_solar_FSLR.csv')
gctay = pd.read_csv('./data_siemens_gamesa_GCTAY.csv')
spwr = pd.read_csv('./data_sunpower_SPWR.csv')
run = pd.read_csv('./data_sunrun_RUN.csv')
plug = pd.read_csv('./data_plug_power_PLUG.csv')

## Set datatime index

In [3]:
def set_datetime_index(df):
    df['date'] = pd.to_datetime(df['Date'], errors='coerce')
    df.set_index('date', inplace=True)
    df.drop('Date', axis=1, inplace=True)
    return df

In [4]:
fslr = set_datetime_index(fslr)
gctay = set_datetime_index(gctay)
spwr = set_datetime_index(spwr)
run = set_datetime_index(run)
plug = set_datetime_index(plug)

# Import data normalized

In [None]:
fslr_normalized = pd.read_csv('./data_first_solar_FSLR_normalized.csv')
gctay_normalized = pd.read_csv('./data_siemens_gamesa_GCTAY_normalized.csv')
spwr_normalized = pd.read_csv('./data_sunpower_SPWR_normalized.csv')
run_normalized = pd.read_csv('./data_sunrun_RUN_normalized.csv')
plug_normalized = pd.read_csv('./data_plug_power_PLUG_normalized.csv')

In [None]:
fslr_normalized = set_datetime_index(fslr_normalized)
gctay_normalized = set_datetime_index(gctay_normalized)
spwr_normalized = set_datetime_index(spwr_normalized)
run_normalized = set_datetime_index(run_normalized)
plug_normalized = set_datetime_index(plug_normalized)

# Granger Tests - FSLR

In [21]:
maxlag = 100
ticker = 'FSLR'
series = 'neg_sentiment'

In [22]:
input_data = np.asarray(fslr[['closing_price', series]])
test = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=8.0899  , p=0.0045  , df_denom=2536, df_num=1
ssr based chi2 test:   chi2=8.0994  , p=0.0044  , df=1
likelihood ratio test: chi2=8.0865  , p=0.0045  , df=1
parameter F test:         F=8.0899  , p=0.0045  , df_denom=2536, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=4.6287  , p=0.0098  , df_denom=2533, df_num=2
ssr based chi2 test:   chi2=9.2757  , p=0.0097  , df=2
likelihood ratio test: chi2=9.2588  , p=0.0098  , df=2
parameter F test:         F=4.6287  , p=0.0098  , df_denom=2533, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=3.5041  , p=0.0148  , df_denom=2530, df_num=3
ssr based chi2 test:   chi2=10.5415 , p=0.0145  , df=3
likelihood ratio test: chi2=10.5197 , p=0.0146  , df=3
parameter F test:         F=3.5041  , p=0.0148  , df_denom=2530, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=2.6045  , p=0.

likelihood ratio test: chi2=44.4901 , p=0.0248  , df=28
parameter F test:         F=1.5667  , p=0.0297  , df_denom=2455, df_num=28

Granger Causality
number of lags (no zero) 29
ssr based F test:         F=1.6326  , p=0.0180  , df_denom=2452, df_num=29
ssr based chi2 test:   chi2=48.4861 , p=0.0131  , df=29
likelihood ratio test: chi2=48.0239 , p=0.0146  , df=29
parameter F test:         F=1.6326  , p=0.0180  , df_denom=2452, df_num=29

Granger Causality
number of lags (no zero) 30
ssr based F test:         F=1.5674  , p=0.0258  , df_denom=2449, df_num=30
ssr based chi2 test:   chi2=48.1920 , p=0.0190  , df=30
likelihood ratio test: chi2=47.7352 , p=0.0211  , df=30
parameter F test:         F=1.5674  , p=0.0258  , df_denom=2449, df_num=30

Granger Causality
number of lags (no zero) 31
ssr based F test:         F=1.5165  , p=0.0339  , df_denom=2446, df_num=31
ssr based chi2 test:   chi2=48.2215 , p=0.0251  , df=31
likelihood ratio test: chi2=47.7640 , p=0.0277  , df=31
parameter F test:

ssr based F test:         F=1.2257  , p=0.1178  , df_denom=2362, df_num=59
ssr based chi2 test:   chi2=75.9576 , p=0.0678  , df=59
likelihood ratio test: chi2=74.8181 , p=0.0803  , df=59
parameter F test:         F=1.2257  , p=0.1178  , df_denom=2362, df_num=59

Granger Causality
number of lags (no zero) 60
ssr based F test:         F=1.1897  , p=0.1530  , df_denom=2359, df_num=60
ssr based chi2 test:   chi2=75.0412 , p=0.0914  , df=60
likelihood ratio test: chi2=73.9283 , p=0.1067  , df=60
parameter F test:         F=1.1897  , p=0.1530  , df_denom=2359, df_num=60

Granger Causality
number of lags (no zero) 61
ssr based F test:         F=1.2321  , p=0.1087  , df_denom=2356, df_num=61
ssr based chi2 test:   chi2=79.0792 , p=0.0597  , df=61
likelihood ratio test: chi2=77.8441 , p=0.0718  , df=61
parameter F test:         F=1.2321  , p=0.1087  , df_denom=2356, df_num=61

Granger Causality
number of lags (no zero) 62
ssr based F test:         F=1.2333  , p=0.1060  , df_denom=2353, df_num=6

ssr based F test:         F=1.3592  , p=0.0165  , df_denom=2278, df_num=87
ssr based chi2 test:   chi2=127.3384, p=0.0032  , df=87
likelihood ratio test: chi2=124.1433, p=0.0055  , df=87
parameter F test:         F=1.3592  , p=0.0165  , df_denom=2278, df_num=87

Granger Causality
number of lags (no zero) 88
ssr based F test:         F=1.3703  , p=0.0139  , df_denom=2275, df_num=88
ssr based chi2 test:   chi2=129.9680, p=0.0024  , df=88
likelihood ratio test: chi2=126.6406, p=0.0044  , df=88
parameter F test:         F=1.3703  , p=0.0139  , df_denom=2275, df_num=88

Granger Causality
number of lags (no zero) 89
ssr based F test:         F=1.3987  , p=0.0091  , df_denom=2272, df_num=89
ssr based chi2 test:   chi2=134.2957, p=0.0014  , df=89
likelihood ratio test: chi2=130.7456, p=0.0026  , df=89
parameter F test:         F=1.3987  , p=0.0091  , df_denom=2272, df_num=89

Granger Causality
number of lags (no zero) 90
ssr based F test:         F=1.3709  , p=0.0130  , df_denom=2269, df_num=9

In [23]:
def find_lags(test):
    lag_found = False
    lags = []
    
    for i in range(1, maxlag):
        p = test[i][0]['ssr_ftest'][1]
        if (p < 0.05):
            lags.append(test[i][0]['ssr_ftest'])
            lag_found = True

    if (lag_found == True):
        return lags
    else:
        print("lag not found")

In [24]:
lags_info = find_lags(test)

In [28]:
lags_info

[(8.089869391077013, 0.004487016466323889, 2536.0, 1),
 (4.62873244150067, 0.009849894056606443, 2533.0, 2),
 (3.5041455144244913, 0.014810302637529704, 2530.0, 3),
 (2.6044894171731925, 0.03419332561287149, 2527.0, 4),
 (2.0231268038932626, 0.048810885919218006, 2518.0, 7),
 (2.230808938093935, 0.022739751273766883, 2515.0, 8),
 (2.0079699233565185, 0.03480295249475197, 2512.0, 9),
 (1.995994373672954, 0.01769205735760449, 2500.0, 13),
 (2.0386197652611204, 0.012404250607010026, 2497.0, 14),
 (1.7178696247183478, 0.04126780576920983, 2494.0, 15),
 (1.7319031937425429, 0.025337112099918183, 2482.0, 19),
 (1.654545564409579, 0.033816506561525336, 2479.0, 20),
 (1.8078163188475744, 0.013527636342464332, 2476.0, 21),
 (1.6264119301713198, 0.03294713653554664, 2473.0, 22),
 (1.6071740213321737, 0.0337347638616717, 2470.0, 23),
 (1.5550164756721798, 0.041734595446157555, 2467.0, 24),
 (1.5311567437241378, 0.04460904962316748, 2464.0, 25),
 (1.5624905288249362, 0.0349069550268957, 2461.0, 26

# Granger Tests - GCTAY

In [29]:
maxlag = 100
ticker = 'GCTAY'
series = 'compound_sentiment'

In [30]:
input_data = np.asarray(gctay[['closing_price', series]])
test = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1
ssr based chi2 test:   chi2=5.6428  , p=0.0175  , df=1
likelihood ratio test: chi2=5.6366  , p=0.0176  , df=1
parameter F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2
ssr based chi2 test:   chi2=6.5743  , p=0.0374  , df=2
likelihood ratio test: chi2=6.5658  , p=0.0375  , df=2
parameter F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3
ssr based chi2 test:   chi2=7.4310  , p=0.0594  , df=3
likelihood ratio test: chi2=7.4201  , p=0.0596  , df=3
parameter F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.8753  , p=0.

ssr based F test:         F=1.2926  , p=0.1396  , df_denom=2455, df_num=28
ssr based chi2 test:   chi2=37.0332 , p=0.1181  , df=28
likelihood ratio test: chi2=36.7628 , p=0.1242  , df=28
parameter F test:         F=1.2926  , p=0.1396  , df_denom=2455, df_num=28

Granger Causality
number of lags (no zero) 29
ssr based F test:         F=1.3053  , p=0.1275  , df_denom=2452, df_num=29
ssr based chi2 test:   chi2=38.7646 , p=0.1063  , df=29
likelihood ratio test: chi2=38.4685 , p=0.1123  , df=29
parameter F test:         F=1.3053  , p=0.1275  , df_denom=2452, df_num=29

Granger Causality
number of lags (no zero) 30
ssr based F test:         F=1.2892  , p=0.1351  , df_denom=2449, df_num=30
ssr based chi2 test:   chi2=39.6383 , p=0.1121  , df=30
likelihood ratio test: chi2=39.3286 , p=0.1185  , df=30
parameter F test:         F=1.2892  , p=0.1351  , df_denom=2449, df_num=30

Granger Causality
number of lags (no zero) 31
ssr based F test:         F=1.3040  , p=0.1217  , df_denom=2446, df_num=3

ssr based F test:         F=1.2214  , p=0.1254  , df_denom=2368, df_num=57
ssr based chi2 test:   chi2=72.9996 , p=0.0751  , df=57
likelihood ratio test: chi2=71.9471 , p=0.0878  , df=57
parameter F test:         F=1.2214  , p=0.1254  , df_denom=2368, df_num=57

Granger Causality
number of lags (no zero) 58
ssr based F test:         F=1.2620  , p=0.0895  , df_denom=2365, df_num=58
ssr based chi2 test:   chi2=76.8198 , p=0.0497  , df=58
likelihood ratio test: chi2=75.6550 , p=0.0596  , df=58
parameter F test:         F=1.2620  , p=0.0895  , df_denom=2365, df_num=58

Granger Causality
number of lags (no zero) 59
ssr based F test:         F=1.2435  , p=0.1023  , df_denom=2362, df_num=59
ssr based chi2 test:   chi2=77.0641 , p=0.0573  , df=59
likelihood ratio test: chi2=75.8914 , p=0.0685  , df=59
parameter F test:         F=1.2435  , p=0.1023  , df_denom=2362, df_num=59

Granger Causality
number of lags (no zero) 60
ssr based F test:         F=1.2289  , p=0.1132  , df_denom=2359, df_num=6

ssr based F test:         F=1.1843  , p=0.1220  , df_denom=2281, df_num=86
ssr based chi2 test:   chi2=109.5748, p=0.0441  , df=86
likelihood ratio test: chi2=107.1989, p=0.0606  , df=86
parameter F test:         F=1.1843  , p=0.1220  , df_denom=2281, df_num=86

Granger Causality
number of lags (no zero) 87
ssr based F test:         F=1.1824  , p=0.1230  , df_denom=2278, df_num=87
ssr based chi2 test:   chi2=110.7746, p=0.0437  , df=87
likelihood ratio test: chi2=108.3463, p=0.0604  , df=87
parameter F test:         F=1.1824  , p=0.1230  , df_denom=2278, df_num=87

Granger Causality
number of lags (no zero) 88
ssr based F test:         F=1.1705  , p=0.1363  , df_denom=2275, df_num=88
ssr based chi2 test:   chi2=111.0219, p=0.0492  , df=88
likelihood ratio test: chi2=108.5819, p=0.0676  , df=88
parameter F test:         F=1.1705  , p=0.1363  , df_denom=2275, df_num=88

Granger Causality
number of lags (no zero) 89
ssr based F test:         F=1.1557  , p=0.1551  , df_denom=2272, df_num=8

In [31]:
def find_lags(test):
    lag_found = False
    lags = []
    
    for i in range(1, maxlag):
        p = test[i][0]['ssr_ftest'][1]
        if (p < 0.05):
            lags.append(test[i][0]['ssr_ftest'])
            lag_found = True

    if (lag_found == True):
        return lags
    else:
        print("lag not found")

In [32]:
lags_info = find_lags(test)

In [28]:
lags_info

[(8.089869391077013, 0.004487016466323889, 2536.0, 1),
 (4.62873244150067, 0.009849894056606443, 2533.0, 2),
 (3.5041455144244913, 0.014810302637529704, 2530.0, 3),
 (2.6044894171731925, 0.03419332561287149, 2527.0, 4),
 (2.0231268038932626, 0.048810885919218006, 2518.0, 7),
 (2.230808938093935, 0.022739751273766883, 2515.0, 8),
 (2.0079699233565185, 0.03480295249475197, 2512.0, 9),
 (1.995994373672954, 0.01769205735760449, 2500.0, 13),
 (2.0386197652611204, 0.012404250607010026, 2497.0, 14),
 (1.7178696247183478, 0.04126780576920983, 2494.0, 15),
 (1.7319031937425429, 0.025337112099918183, 2482.0, 19),
 (1.654545564409579, 0.033816506561525336, 2479.0, 20),
 (1.8078163188475744, 0.013527636342464332, 2476.0, 21),
 (1.6264119301713198, 0.03294713653554664, 2473.0, 22),
 (1.6071740213321737, 0.0337347638616717, 2470.0, 23),
 (1.5550164756721798, 0.041734595446157555, 2467.0, 24),
 (1.5311567437241378, 0.04460904962316748, 2464.0, 25),
 (1.5624905288249362, 0.0349069550268957, 2461.0, 26

# Granger Tests - RUN

# Granger Tests - PLUG

#  Granger Tests - SPWR