In [24]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)

from statsmodels.tsa.stattools import grangercausalitytests

# Import data

In [25]:
fslr = pd.read_csv('./data_first_solar_FSLR.csv')
gctay = pd.read_csv('./data_siemens_gamesa_GCTAY.csv')
spwr = pd.read_csv('./data_sunpower_SPWR.csv')
run = pd.read_csv('./data_sunrun_RUN.csv')
plug = pd.read_csv('./data_plug_power_PLUG.csv')

## Set datatime index

In [26]:
def set_datetime_index(df):
    df['date'] = pd.to_datetime(df['Date'], errors='coerce')
    df.set_index('date', inplace=True)
    df.drop('Date', axis=1, inplace=True)
    return df

In [27]:
fslr = set_datetime_index(fslr)
gctay = set_datetime_index(gctay)
spwr = set_datetime_index(spwr)
run = set_datetime_index(run)
plug = set_datetime_index(plug)

# Import data normalized

In [28]:
fslr_normalized = pd.read_csv('./data_first_solar_FSLR_normalized.csv')
gctay_normalized = pd.read_csv('./data_siemens_gamesa_GCTAY_normalized.csv')
spwr_normalized = pd.read_csv('./data_sunpower_SPWR_normalized.csv')
run_normalized = pd.read_csv('./data_sunrun_RUN_normalized.csv')
plug_normalized = pd.read_csv('./data_plug_power_PLUG_normalized.csv')

In [29]:
fslr_normalized = set_datetime_index(fslr_normalized)
gctay_normalized = set_datetime_index(gctay_normalized)
spwr_normalized = set_datetime_index(spwr_normalized)
run_normalized = set_datetime_index(run_normalized)
plug_normalized = set_datetime_index(plug_normalized)

# Granger Tests - FSLR

In [135]:
# column_names = ["ticker", "lag", "F", "p"]
# granger_res_lags = pd.DataFrame(columns=column_names)

In [156]:
def find_lags(test, ticker):
    lag_found = False
    lags = {}
    
    for i in range(1, maxlag):
        p = test[i][0]['ssr_ftest'][1]
        if (p < 0.05):
            print('lag ' + str(i) +  ' with p = ' + str(p))
            lags[i] = test[i][0]['ssr_ftest'] 
            lag_found = True

    if (lag_found == True):
        return lags
    else:
        print("lag not found")
    

In [157]:
maxlag = 10

## res_fslr_1: closing_price and compound_sentiment

In [158]:
input_data = np.asarray(fslr_normalized[['closing_price','compound_sentiment']])
res_fslr_1 = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1
ssr based chi2 test:   chi2=5.6428  , p=0.0175  , df=1
likelihood ratio test: chi2=5.6366  , p=0.0176  , df=1
parameter F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2
ssr based chi2 test:   chi2=6.5743  , p=0.0374  , df=2
likelihood ratio test: chi2=6.5658  , p=0.0375  , df=2
parameter F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3
ssr based chi2 test:   chi2=7.4310  , p=0.0594  , df=3
likelihood ratio test: chi2=7.4201  , p=0.0596  , df=3
parameter F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.8753  , p=0.

In [159]:
find_lags(res_fslr_1, 'FSLR')

lag 1 with p = 0.017667746271182356
lag 2 with p = 0.03776220147425138


{1: (5.636161101099211, 0.017667746271182356, 2536.0, 1),
 2: (3.2806883928127704, 0.03776220147425138, 2533.0, 2)}

## res_fslr_2: closing_price and pos_sentiment

In [147]:
input_data = np.asarray(fslr_normalized[['closing_price','compound_sentiment']])
res_fslr_2 = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1
ssr based chi2 test:   chi2=5.6428  , p=0.0175  , df=1
likelihood ratio test: chi2=5.6366  , p=0.0176  , df=1
parameter F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2
ssr based chi2 test:   chi2=6.5743  , p=0.0374  , df=2
likelihood ratio test: chi2=6.5658  , p=0.0375  , df=2
parameter F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3
ssr based chi2 test:   chi2=7.4310  , p=0.0594  , df=3
likelihood ratio test: chi2=7.4201  , p=0.0596  , df=3
parameter F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.8753  , p=0.

In [127]:
res_fslr_2_lags = find_lags(res_fslr_2,'FSLR')

lag 1 with p = 0.017667746271182356
lag 2 with p = 0.03776220147425138


In [128]:
res_fslr_2_lags

Unnamed: 0,ticker,lag,F,p


# f-tests

In [None]:
input_data = np.asarray(fslr_normalized[['closing_price','compound_sentiment']])
test = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);

In [182]:
test

{1: ({'ssr_ftest': (5.636161101099211, 0.017667746271182356, 2536.0, 1),
   'ssr_chi2test': (5.642828484105243, 0.017526909328234485, 1),
   'lrtest': (5.636567276123969, 0.01758961339359395, 1),
   'params_ftest': (5.636161101098851, 0.017667746271182984, 2536.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7ff6a5a8fa00>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7ff6a5a8fac0>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (3.2806883928127704, 0.03776220147425138, 2533.0, 2),
   'ssr_chi2test': (6.574328575569532, 0.0373596405177097, 2),
   'lrtest': (6.565828319267894, 0.03751876168072078, 2),
   'params_ftest': (3.280688392812215, 0.037762201474274136, 2533.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7ff6a5a8fd00>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7ff6a5a92550>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr_ftest': (2.

In [200]:
# column_names = ["F", "p", "lag", "ticker",  "x2", "x1"]
# granger_res_lags = pd.DataFrame(columns=column_names)

In [201]:
# granger_res_lags = pd.DataFrame(f_tests_df, columns=column_names)

In [278]:
def find_lags(test, ticker):
    lag_found = False
    lags = []
    
    for i in range(1, maxlag):
        p = test[i][0]['ssr_ftest'][1]
        if (p < 0.05):
            print('lag ' + str(i) +  ' with p = ' + str(p))
            lags.append(test[i][0]['ssr_ftest'])
            lag_found = True

    if (lag_found == True):
        return lags
    else:
        print("lag not found")

In [279]:
lags = find_lags(test,'FSLR')

lag 1 with p = 0.017667746271182356
lag 2 with p = 0.03776220147425138


In [280]:
lags

[(5.636161101099211, 0.017667746271182356, 2536.0, 1),
 (3.2806883928127704, 0.03776220147425138, 2533.0, 2)]

In [281]:
def store_lags(granger_res_lags, lags, ticker, x2, x1):
    for i in lags:
        F = i[0]
        p = i[1]
        lag = i[3]
        new_row = pd.Series([F, p, lag, ticker, x2, x1], index=["F", "p", "lag", "ticker",  "x2", "x1"])
        print(new_row)
        print("\n")
        granger_res_lags.append(new_row, ignore_index=True)
    
    return granger_res_lags

In [282]:
column_names = ["F", "p", "lag", "ticker",  "x2", "x1"]
granger_res_lags = pd.DataFrame(columns=column_names)

In [283]:
lags

[(5.636161101099211, 0.017667746271182356, 2536.0, 1),
 (3.2806883928127704, 0.03776220147425138, 2533.0, 2)]

In [285]:
store_lags(granger_res_lags, lags,'FSLR', 'closing_price', 'compound_sentiment')

F                    5.63616
p                  0.0176677
lag                        1
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object


F                    3.28069
p                  0.0377622
lag                        2
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object




In [293]:
granger_res_lags

Unnamed: 0,F,p,lag,ticker,x2,x1


In [None]:
column_names = ["F", "p", "lag", "ticker",  "x2", "x1"]
granger_res_lags = pd.DataFrame(columns=column_names)

In [294]:
# column_names = ["F", "p", "lag", "ticker",  "x2", "x1"]
# granger_res_lags = pd.DataFrame(columns=column_names)
ticker = 'FSLR'
x2 = 'closing_price'
x1 = 'compound_sentiment'

for i in lags:
    F = i[0]
    p = i[1]
    lag = i[3]
    new_row = pd.Series([F, p, lag, ticker, x2, x1], index=["F", "p", "lag", "ticker",  "x2", "x1"])
    print(new_row)
    print("\n")
    granger_res_lags.append(new_row, ignore_index=True)

F                    5.63616
p                  0.0176677
lag                        1
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object


F                    3.28069
p                  0.0377622
lag                        2
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object




In [295]:
granger_res_lags

Unnamed: 0,F,p,lag,ticker,x2,x1
