In [3]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 100)

from statsmodels.tsa.stattools import grangercausalitytests

# Import data

In [4]:
fslr = pd.read_csv('./data_first_solar_FSLR.csv')
gctay = pd.read_csv('./data_siemens_gamesa_GCTAY.csv')
spwr = pd.read_csv('./data_sunpower_SPWR.csv')
run = pd.read_csv('./data_sunrun_RUN.csv')
plug = pd.read_csv('./data_plug_power_PLUG.csv')

## Set datatime index

In [5]:
def set_datetime_index(df):
    df['date'] = pd.to_datetime(df['Date'], errors='coerce')
    df.set_index('date', inplace=True)
    df.drop('Date', axis=1, inplace=True)
    return df

In [6]:
fslr = set_datetime_index(fslr)
gctay = set_datetime_index(gctay)
spwr = set_datetime_index(spwr)
run = set_datetime_index(run)
plug = set_datetime_index(plug)

# Import data normalized

In [7]:
fslr_normalized = pd.read_csv('./data_first_solar_FSLR_normalized.csv')
gctay_normalized = pd.read_csv('./data_siemens_gamesa_GCTAY_normalized.csv')
spwr_normalized = pd.read_csv('./data_sunpower_SPWR_normalized.csv')
run_normalized = pd.read_csv('./data_sunrun_RUN_normalized.csv')
plug_normalized = pd.read_csv('./data_plug_power_PLUG_normalized.csv')

In [8]:
fslr_normalized = set_datetime_index(fslr_normalized)
gctay_normalized = set_datetime_index(gctay_normalized)
spwr_normalized = set_datetime_index(spwr_normalized)
run_normalized = set_datetime_index(run_normalized)
plug_normalized = set_datetime_index(plug_normalized)

# Granger Tests - FSLR

In [17]:
maxlag = 10

In [18]:
input_data = np.asarray(fslr_normalized[['closing_price','compound_sentiment']])
test = grangercausalitytests(input_data, maxlag, addconst = True, verbose=True);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1
ssr based chi2 test:   chi2=5.6428  , p=0.0175  , df=1
likelihood ratio test: chi2=5.6366  , p=0.0176  , df=1
parameter F test:         F=5.6362  , p=0.0177  , df_denom=2536, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2
ssr based chi2 test:   chi2=6.5743  , p=0.0374  , df=2
likelihood ratio test: chi2=6.5658  , p=0.0375  , df=2
parameter F test:         F=3.2807  , p=0.0378  , df_denom=2533, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3
ssr based chi2 test:   chi2=7.4310  , p=0.0594  , df=3
likelihood ratio test: chi2=7.4201  , p=0.0596  , df=3
parameter F test:         F=2.4702  , p=0.0602  , df_denom=2530, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.8753  , p=0.

In [24]:
def find_lags(test, ticker):
    lag_found = False
    lags = []
    
    for i in range(1, maxlag):
        p = test[i][0]['ssr_ftest'][1]
        if (p < 0.05):
#             print('lag ' + str(i) +  ' with p = ' + str(p))
            lags.append(test[i][0]['ssr_ftest'])
            lag_found = True

    if (lag_found == True):
        return lags
    else:
        print("lag not found")

In [25]:
lags_info = find_lags(test,'FSLR')

In [26]:
lags_info

[(5.636161101099211, 0.017667746271182356, 2536.0, 1),
 (3.2806883928127704, 0.03776220147425138, 2533.0, 2)]

In [29]:
# def store_lags(granger_res_lags, lags, ticker, x2, x1):
#     for i in lags:
#         F = i[0]
#         p = i[1]
#         lag = i[3]
#         new_row = pd.Series([F, p, lag, ticker, x2, x1], index=["F", "p", "lag", "ticker",  "x2", "x1"])
#         print(new_row)
#         print("\n")
#         granger_res_lags.append(new_row, ignore_index=True)
    
#     return granger_res_lags

In [None]:
# store_lags(granger_res_lags, lags,'FSLR', 'closing_price', 'compound_sentiment')

In [32]:
column_names = ["F", "p", "lag", "ticker",  "x2", "x1"]
granger_res_lags = pd.DataFrame(columns=column_names)

In [34]:
column_names = ['F', 'p', 'lag', 'ticker',  'x2', 'x1']
granger_res_lags = pd.DataFrame(columns=column_names)

In [39]:
def get_new_rows(lags_info, ticker, x2, x1):
    new_rows = []
    
    for i in lags_info:
        F = i[0]
        p = i[1]
        lag = i[3]
        new_row = [F, p, lag, ticker, x2, x1]
        new_rows.append(new_row)
#         new_row = pd.Series(row_list, labels)
        print(new_row)
        print("\n")
    
    return new_rows

In [40]:
new_rows = get_new_rows(lags_info, ticker, x2, x1)

[5.636161101099211, 0.017667746271182356, 1, 'FSLR', 'closing_price', 'compound_sentiment']


[3.2806883928127704, 0.03776220147425138, 2, 'FSLR', 'closing_price', 'compound_sentiment']




In [43]:
new_rows

[[5.636161101099211,
  0.017667746271182356,
  1,
  'FSLR',
  'closing_price',
  'compound_sentiment'],
 [3.2806883928127704,
  0.03776220147425138,
  2,
  'FSLR',
  'closing_price',
  'compound_sentiment']]

In [45]:
# # column_names = ["F", "p", "lag", "ticker",  "x2", "x1"]
# # granger_res_lags = pd.DataFrame(columns=column_names)

# ticker = 'FSLR'
# x2 = 'closing_price'
# x1 = 'compound_sentiment'
# labels = ["F", "p", "lag", "ticker",  "x2", "x1"]

# for i in lags_info:
#     F = i[0]
#     p = i[1]
#     lag = i[3]
#     row_list = [F, p, lag, ticker, x2, x1]
#     new_row = pd.Series(row_list, labels)
#     print(new_row)
#     print("\n")
# #     granger_res_lags.append(new_row, ignore_index=True)

F                    5.63616
p                  0.0176677
lag                        1
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object


F                    3.28069
p                  0.0377622
lag                        2
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object




In [58]:
def create_new_df(df, columns, new_rows):

    for row in new_rows:
        row_series = pd.Series(row, columns)
        df.append(row_series, ignore_index=True)
        print(row_series)
        
    return 
        

In [59]:
column_names = ['F', 'p', 'lag', 'ticker',  'x2', 'x1']
result = pd.DataFrame(columns=column_names)
result = create_new_df(result, column_names, new_rows)

F                    5.63616
p                  0.0176677
lag                        1
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object
F                    3.28069
p                  0.0377622
lag                        2
ticker                  FSLR
x2             closing_price
x1        compound_sentiment
dtype: object


In [60]:
result