In [1]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)

from statsmodels.tsa.stattools import grangercausalitytests

# Import data

In [2]:
fslr = pd.read_csv('./data_first_solar_FSLR.csv')
gctay = pd.read_csv('./data_siemens_gamesa_GCTAY.csv')
spwr = pd.read_csv('./data_sunpower_SPWR.csv')
run = pd.read_csv('./data_sunrun_RUN.csv')
plug = pd.read_csv('./data_plug_power_PLUG.csv')

## Set datatime index

In [3]:
def set_datetime_index(df):
    df['date'] = pd.to_datetime(df['Date'], errors='coerce')
    df.set_index('date', inplace=True)
    df.drop('Date', axis=1, inplace=True)
    return df

In [4]:
fslr = set_datetime_index(fslr)
gctay = set_datetime_index(gctay)
spwr = set_datetime_index(spwr)
run = set_datetime_index(run)
plug = set_datetime_index(plug)

# Import data normalized

In [5]:
fslr_normalized = pd.read_csv('./data_first_solar_FSLR_normalized.csv')
gctay_normalized = pd.read_csv('./data_siemens_gamesa_GCTAY_normalized.csv')
spwr_normalized = pd.read_csv('./data_sunpower_SPWR_normalized.csv')
run_normalized = pd.read_csv('./data_sunrun_RUN_normalized.csv')
plug_normalized = pd.read_csv('./data_plug_power_PLUG_normalized.csv')

In [6]:
fslr_normalized = set_datetime_index(fslr_normalized)
gctay_normalized = set_datetime_index(gctay_normalized)
spwr_normalized = set_datetime_index(spwr_normalized)
run_normalized = set_datetime_index(run_normalized)
plug_normalized = set_datetime_index(plug_normalized)

# Granger Tests - FSLR

In [7]:
maxlag = 10
ticker = 'FSLR'
series = 'compound_sentiment'

In [8]:
input_data = np.asarray(fslr_normalized[['closing_price', series]])
test = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1
ssr based chi2 test:   chi2=5.6428  , p=0.0175  , df=1
likelihood ratio test: chi2=5.6366  , p=0.0176  , df=1
parameter F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2
ssr based chi2 test:   chi2=6.5743  , p=0.0374  , df=2
likelihood ratio test: chi2=6.5658  , p=0.0375  , df=2
parameter F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3
ssr based chi2 test:   chi2=7.4310  , p=0.0594  , df=3
likelihood ratio test: chi2=7.4201  , p=0.0596  , df=3
parameter F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.8753  , p=0.

In [9]:
# test[1][0]['ssr_ftest']

In [10]:
def find_lags(test):
    lag_found = False
    lags = []
    
    for i in range(1, maxlag):
        p = test[i][0]['ssr_ftest'][1]
        if (p < 0.05):
            lags.append(test[i][0]['ssr_ftest'])
            lag_found = True

    if (lag_found == True):
        return lags
    else:
        print("lag not found")

In [121]:
lags_info = find_lags(test)

In [122]:
lags_info

[(5.636161101099211, 0.017667746271182356, 2536.0, 1),
 (3.2806883928127704, 0.03776220147425138, 2533.0, 2)]

# Populate df

In [178]:
column_names = ['F', 'p', 'lag', 'ticker', 'name_series']
granger_res_lags = pd.DataFrame(columns=column_names)
granger_res_list = []

In [179]:
type(lags_info[0])

tuple

In [184]:
def populate_df(lags_info, ticker, series):
    for index, t in enumerate(lags_info):
        F = t[0]
        p = t[1]
        lag = t[3]
        row_list = [F, p, lag, ticker, series]
        new_row_df = pd.DataFrame(row_list, column_names)
        
        
        granger_res_list.append(new_row_df.T)
        print(granger_res_list)
#         print(granger_res_list)
#         print('new_row_df',new_row_df.T)
#         print('granger_res_lags', granger_res_lags.columns)
#         granger_res_lags.concat(new_row_df, ignore_index=True)
#         pd.concat([granger_res_lags, new_row_df.T], ignore_index=True)
        

#         granger_res_lags.append(new_row_df.T, ignore_index=True)
#         print('granger_res_lags=\n',granger_res_lags)

In [183]:
populate_df(lags_info, ticker, series)

[         F          p lag ticker         name_series
0  5.63616  0.0176677   1   FSLR  compound_sentiment,          F          p lag ticker         name_series
0  3.28069  0.0377622   2   FSLR  compound_sentiment,          F          p lag ticker         name_series
0  5.63616  0.0176677   1   FSLR  compound_sentiment]
[         F          p lag ticker         name_series
0  5.63616  0.0176677   1   FSLR  compound_sentiment,          F          p lag ticker         name_series
0  3.28069  0.0377622   2   FSLR  compound_sentiment,          F          p lag ticker         name_series
0  5.63616  0.0176677   1   FSLR  compound_sentiment,          F          p lag ticker         name_series
0  3.28069  0.0377622   2   FSLR  compound_sentiment]


In [175]:
granger_res_lags

Unnamed: 0,F,p,lag,ticker,name_series
