In [1]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import numpy as np

In [31]:
companies_with_cap = pd.read_csv('../data/companies_after_2005.csv', encoding='latin1')
df = pd.read_csv('../data/new_dataset.csv', encoding='latin1')
#to see: returns from svi all, retturns from svi high, returns from svi low, returns from edgar all/high/all
#the same with volume (function)
#How to choose a lag and what is moving average

In [33]:
companies_with_cap.rename(columns={'Symbol': 'symbol'}, inplace=True)
df = df.merge(companies_with_cap[['symbol', 'capitalization']], on='symbol', how='left')
df.head()

Unnamed: 0.1,Unnamed: 0,index,symbol,date,svi,edgar,price,volume,WeeklyReturns,returns,capitalization
0,0,0,MSFT,2005-01-02,31338,145,26.67,398924026.0,,,high
1,1,1,MSFT,2005-01-09,33079,3220,26.12,379712121.0,-2.062242,-2.0,high
2,2,2,MSFT,2005-01-16,20892,3030,25.65,266617523.0,-1.799387,-2.0,high
3,3,3,MSFT,2005-01-23,34820,3539,26.18,409844550.0,2.066277,2.0,high
4,4,4,MSFT,2005-01-30,26115,4112,26.32,347830186.0,0.534759,1.0,high


In [36]:
df.dropna(inplace=True)

In [37]:
def test_results_int(df, lag_num, verbose):
    results_table = []

    for symbol in df['symbol'].unique():
        # Extract data for the current company
        company_data = df[df['symbol'] == symbol]

        # Granger causality test for returns and svi
        returns_svi_test = grangercausalitytests(company_data[['WeeklyReturns', 'svi']], lag_num, verbose=verbose)
        returns_svi_p_values = returns_svi_test[lag_num][0]['ssr_ftest'][1]
        returns_svi_conclusion = 1 if returns_svi_p_values < 0.05 else 0

        # Granger causality test for returns and edgar
        returns_edgar_test = grangercausalitytests(company_data[['WeeklyReturns', 'edgar']], lag_num, verbose=verbose)
        returns_edgar_p_values = returns_edgar_test[lag_num][0]['ssr_ftest'][1]
        returns_edgar_conclusion = 1 if returns_edgar_p_values < 0.05 else 0

        # Granger causality test for volume and svi
        volume_svi_test = grangercausalitytests(company_data[['volume', 'svi']], lag_num, verbose=verbose)
        volume_svi_p_values = volume_svi_test[lag_num][0]['ssr_ftest'][1]
        volume_svi_conclusion = 1 if volume_svi_p_values < 0.05 else 0

        # Granger causality test for volume and edgar
        volume_edgar_test = grangercausalitytests(company_data[['volume', 'edgar']], lag_num, verbose=verbose)
        volume_edgar_p_values = volume_edgar_test[lag_num][0]['ssr_ftest'][1]
        volume_edgar_conclusion = 1 if volume_edgar_p_values < 0.05 else 0

        # Extract capitalization index for the current company
        capitalization = company_data['capitalization'].values[0]

        # Append the results to the table
        results_table.append({
            'Symbol': symbol,
            'Capitalization': capitalization,
            'Returns-SVI Test Results': returns_svi_p_values,
            'Returns-SVI Conclusion': returns_svi_conclusion,
            'Returns-EDGAR Test Results': returns_edgar_p_values,
            'Returns-EDGAR Conclusion': returns_edgar_conclusion,
            'Volume-SVI Test Results': volume_svi_p_values,
            'Volume-SVI Conclusion': volume_svi_conclusion,
            'Volume-EDGAR Test Results': volume_edgar_p_values,
            'Volume-EDGAR Conclusion': volume_edgar_conclusion,
        })

    # Convert the results to a DataFrame
    results_df = pd.DataFrame(results_table)

    return results_df

In [38]:
results = test_results_int(df, 5, False)



In [39]:
results.head(10)

Unnamed: 0,Symbol,Capitalization,Returns-SVI Test Results,Returns-SVI Conclusion,Returns-EDGAR Test Results,Returns-EDGAR Conclusion,Volume-SVI Test Results,Volume-SVI Conclusion,Volume-EDGAR Test Results,Volume-EDGAR Conclusion
0,MSFT,high,0.931541,0,0.01435056,1,0.125338,0,0.6620867,0
1,AAPL,high,0.178718,0,0.1897631,0,0.080157,0,0.1499291,0
2,NVDA,high,0.202968,0,0.9283577,0,0.929353,0,0.8408436,0
3,AMZN,high,0.556274,0,0.8499436,0,0.19773,0,0.5815074,0
4,GOOG,high,0.269169,0,2.425758e-69,1,0.613652,0,1.420695e-40,1
5,LLY,high,0.721001,0,0.06399764,0,0.350996,0,0.906969,0
6,TSM,high,0.312085,0,0.8417704,0,0.242769,0,0.468646,0
7,NVO,high,0.298538,0,0.9793509,0,0.340849,0,0.715942,0
8,JPM,high,0.044747,1,0.9089402,0,0.079622,0,0.9970973,0
9,UNH,high,0.252835,0,0.7375105,0,0.217804,0,0.8685916,0


In [115]:
results.to_csv('../data-analysis/granger results.csv')

In [40]:
results.shape

(290, 10)

In [41]:
ones_count_per_column = results.sum(axis=0)
ones_count_per_column

Symbol                        MSFTAAPLNVDAAMZNGOOGLLYTSMNVOJPMUNHWMTXOMJNJPG...
Capitalization                highhighhighhighhighhighhighhighhighhighhighhi...
Returns-SVI Test Results                                             153.323422
Returns-SVI Conclusion                                                       19
Returns-EDGAR Test Results                                           199.343026
Returns-EDGAR Conclusion                                                      9
Volume-SVI Test Results                                               87.316582
Volume-SVI Conclusion                                                       101
Volume-EDGAR Test Results                                            140.981234
Volume-EDGAR Conclusion                                                      36
dtype: object

In [42]:
resultshigh = results[results['Capitalization'] == 'high']
ones_count_per_column = resultshigh.sum(axis=0)
ones_count_per_column

Symbol                        MSFTAAPLNVDAAMZNGOOGLLYTSMNVOJPMUNHWMTXOMJNJPG...
Capitalization                highhighhighhighhighhighhighhighhighhighhighhi...
Returns-SVI Test Results                                             127.676903
Returns-SVI Conclusion                                                       16
Returns-EDGAR Test Results                                           164.660746
Returns-EDGAR Conclusion                                                      8
Volume-SVI Test Results                                               70.538743
Volume-SVI Conclusion                                                        85
Volume-EDGAR Test Results                                            116.402408
Volume-EDGAR Conclusion                                                      26
dtype: object

In [43]:
resultshigh.shape

(243, 10)

In [44]:
resultslow = results[results['Capitalization'] == 'low']
ones_count_per_column = resultslow.sum(axis=0)
ones_count_per_column

Symbol                        LKFNTRMKNBTBCXWBBLZBNSSCDVAXTTMITDSCBRLNTCTCSG...
Capitalization                lowlowlowlowlowlowlowlowlowlowlowlowlowlowlowl...
Returns-SVI Test Results                                              25.646519
Returns-SVI Conclusion                                                        3
Returns-EDGAR Test Results                                             34.68228
Returns-EDGAR Conclusion                                                      1
Volume-SVI Test Results                                                16.77784
Volume-SVI Conclusion                                                        16
Volume-EDGAR Test Results                                             24.578826
Volume-EDGAR Conclusion                                                      10
dtype: object

In [45]:
resultslow.shape

(47, 10)

In [46]:
percentRS = results['Returns-SVI Conclusion'].sum(axis=0)/len(results)
percentRE = results['Returns-EDGAR Conclusion'].sum(axis=0)/len(results)
percentVS = results['Volume-SVI Conclusion'].sum(axis=0)/len(results)
percentVE = results['Volume-EDGAR Conclusion'].sum(axis=0)/len(results)
percentRShigh = resultshigh['Returns-SVI Conclusion'].sum(axis=0)/len(resultshigh)
percentREhigh = resultshigh['Returns-EDGAR Conclusion'].sum(axis=0)/len(resultshigh)
percentVShigh = resultshigh['Volume-SVI Conclusion'].sum(axis=0)/len(resultshigh)
percentVEhigh = resultshigh['Volume-EDGAR Conclusion'].sum(axis=0)/len(resultshigh)
percentRSlow = resultslow['Returns-SVI Conclusion'].sum(axis=0)/len(resultslow)
percentRElow = resultslow['Returns-EDGAR Conclusion'].sum(axis=0)/len(resultslow)
percentVSlow = resultslow['Volume-SVI Conclusion'].sum(axis=0)/len(resultslow)
percentVElow = resultslow['Volume-EDGAR Conclusion'].sum(axis=0)/len(resultslow)

In [48]:
percentVS

0.3482758620689655

In [143]:
#CHECK LAG 10
results = test_results_int(df, 10, False)
results.sum(axis=0)



Symbol                        MSFTAAPLNVDAAMZNGOOGLLYTSMNVOJPMUNHWMTXOMJNJPG...
Capitalization                highhighhighhighhighhighhighhighhighhighhighhi...
Returns-SVI Test Results                                             166.686884
Returns-SVI Conclusion                                                       34
Returns-EDGAR Test Results                                           213.762376
Returns-EDGAR Conclusion                                                     17
Volume-SVI Test Results                                                97.47995
Volume-SVI Conclusion                                                       102
Volume-EDGAR Test Results                                            136.513097
Volume-EDGAR Conclusion                                                      91
dtype: object