In [71]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import numpy as np

In [99]:
companies_with_cap = pd.read_csv('../data/companies_after_2005.csv', encoding='latin1')
df = pd.read_csv('market_svi_edgar.csv', encoding='latin1')
#to see: returns from svi all, retturns from svi high, returns from svi low, returns from edgar all/high/all
#the same with volume (function)
#How to choose a lag and what is moving average

In [100]:
companies_with_cap.rename(columns={'Symbol': 'symbol'}, inplace=True)
df = df.merge(companies_with_cap[['symbol', 'capitalization']], on='symbol', how='left')
df.head()

Unnamed: 0.1,Unnamed: 0,symbol,date,svi,svi.1,edgar,price,volume,capitalization
0,0,MSFT,2005-01-02,18,31338,145,26.67,398924026.0,high
1,1,MSFT,2005-01-09,19,33079,3220,26.12,379712121.0,high
2,2,MSFT,2005-01-16,12,20892,3030,25.65,266617523.0,high
3,3,MSFT,2005-01-23,20,34820,3539,26.18,409844550.0,high
4,4,MSFT,2005-01-30,15,26115,4112,26.32,347830186.0,high


In [101]:
df['WeeklyReturns'] = 0.0  # Initialize the column with zeros

for company in df['symbol'].unique():
    company_mask = df['symbol'] == company
    df.loc[company_mask, 'WeeklyReturns'] = df.loc[company_mask, 'price'].pct_change() * 100

In [46]:
df.to_csv('../data/returns_svi_edgar.csv')

In [102]:
nan_rows = df[df.isna().any(axis=1)]
nan_rows
df.dropna(inplace=True)

In [103]:
df.dropna(inplace=True)
df

Unnamed: 0.1,Unnamed: 0,symbol,date,svi,svi.1,edgar,price,volume,capitalization,WeeklyReturns
1,1,MSFT,2005-01-09,19,33079,3220,26.12,379712121.0,high,-2.062242
2,2,MSFT,2005-01-16,12,20892,3030,25.65,266617523.0,high,-1.799387
3,3,MSFT,2005-01-23,20,34820,3539,26.18,409844550.0,high,2.066277
4,4,MSFT,2005-01-30,15,26115,4112,26.32,347830186.0,high,0.534759
5,5,MSFT,2005-02-06,16,27856,3502,25.97,360145305.0,high,-1.329787
...,...,...,...,...,...,...,...,...,...,...
377099,377099,STM,2023-05-07,0,0,1960,42.42,14032941.0,high,-0.375763
377100,377100,STM,2023-05-14,0,0,1469,45.04,16758607.0,high,6.176332
377101,377101,STM,2023-05-21,0,0,2022,44.06,18806074.0,high,-2.175844
377102,377102,STM,2023-05-28,0,0,1855,44.91,14946141.0,high,1.929187


In [83]:
def testresultsint(data, lag_num, verbose): 
    test_result = grangercausalitytests(data, lag_num, verbose=verbose)

        # Extract the p-values from the test results
    p_values = [test_result[i+1][0]['ssr_ftest'][1] for i in range(lag_num)]

        # Print the test statistics and conclusion
    if verbose:
        print(f"Granger Causality Test Results:")
        for i in range(1, lag_num + 1):
            print(f"\nLag {i} - p-value: {p_values[i-1]}")
            if p_values[i-1] < 0.05:
                print(f"Conclusion: There is Granger causality at lag {i}")
            else:
                print(f"Conclusion: There is no Granger causality at lag {i}")

In [84]:
testresultsint(data, 4, True)




Granger Causality
number of lags (no zero) 1
ssr based F test:         F=2.6184  , p=0.1056  , df_denom=306654, df_num=1
ssr based chi2 test:   chi2=2.6184  , p=0.1056  , df=1
likelihood ratio test: chi2=2.6184  , p=0.1056  , df=1
parameter F test:         F=2.6184  , p=0.1056  , df_denom=306654, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=8.0184  , p=0.0003  , df_denom=306651, df_num=2
ssr based chi2 test:   chi2=16.0372 , p=0.0003  , df=2
likelihood ratio test: chi2=16.0367 , p=0.0003  , df=2
parameter F test:         F=8.0184  , p=0.0003  , df_denom=306651, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=7.9457  , p=0.0000  , df_denom=306648, df_num=3
ssr based chi2 test:   chi2=23.8378 , p=0.0000  , df=3
likelihood ratio test: chi2=23.8368 , p=0.0000  , df=3
parameter F test:         F=7.9457  , p=0.0000  , df_denom=306648, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=7.

In [111]:
def test_results_int(df, lag_num, verbose):
    results_table = []

    for symbol in df['symbol'].unique():
        # Extract data for the current company
        company_data = df[df['symbol'] == symbol]

        # Granger causality test for returns and svi
        returns_svi_test = grangercausalitytests(company_data[['svi', 'WeeklyReturns']], lag_num, verbose=verbose)
        returns_svi_p_values = [returns_svi_test[i+1][0]['ssr_ftest'][1] for i in range(lag_num)]
        returns_svi_conclusions = [f"There is Granger causality at lag {i}" if p < 0.05 else f"There is no Granger causality at lag {i}" for i, p in enumerate(returns_svi_p_values)]
        
        # Granger causality test for returns and edgar
        returns_edgar_test = grangercausalitytests(company_data[['edgar', 'WeeklyReturns']], lag_num, verbose=verbose)
        returns_edgar_p_values = [returns_edgar_test[i+1][0]['ssr_ftest'][1] for i in range(lag_num)]
        returns_edgar_conclusions = [f"There is Granger causality at lag {i}" if p < 0.05 else f"There is no Granger causality at lag {i}" for i, p in enumerate(returns_edgar_p_values)]

        # Granger causality test for volume and svi
        volume_svi_test = grangercausalitytests(company_data[['svi', 'volume']], lag_num, verbose=verbose)
        volume_svi_p_values = [volume_svi_test[i+1][0]['ssr_ftest'][1] for i in range(lag_num)]
        volume_svi_conclusions = [f"There is Granger causality at lag {i}" if p < 0.05 else f"There is no Granger causality at lag {i}" for i, p in enumerate(volume_svi_p_values)]

        # Granger causality test for volume and edgar
        volume_edgar_test = grangercausalitytests(company_data[['edgar', 'volume']], lag_num, verbose=verbose)
        volume_edgar_p_values = [volume_edgar_test[i+1][0]['ssr_ftest'][1] for i in range(lag_num)]
        volume_edgar_conclusions = [f"There is Granger causality at lag {i}" if p < 0.05 else f"There is no Granger causality at lag {i}" for i, p in enumerate(volume_edgar_p_values)]

        # Extract capitalization index for the current company
        capitalization = company_data['capitalization'].values[0]

        # Append the results to the table
        results_table.append({
            'Symbol': symbol,
            'Capitalization': capitalization,
            'Returns-SVI Test Results': returns_svi_p_values,
            'Returns-SVI Conclusions': returns_svi_conclusions,
            'Returns-EDGAR Test Results': returns_edgar_p_values,
            'Returns-EDGAR Conclusions': returns_edgar_conclusions,
            'Volume-SVI Test Results': volume_svi_p_values,
            'Volume-SVI Conclusions': volume_svi_conclusions,
            'Volume-EDGAR Test Results': volume_edgar_p_values,
            'Volume-EDGAR Conclusions': volume_edgar_conclusions,
        })

    # Convert the results to a DataFrame
    results_df = pd.DataFrame(results_table)

    return results_df

In [112]:
results = test_results_int(df, 5, False)



In [114]:
results.head(1)

Unnamed: 0,Symbol,Capitalization,Returns-SVI Test Results,Returns-SVI Conclusions,Returns-EDGAR Test Results,Returns-EDGAR Conclusions,Volume-SVI Test Results,Volume-SVI Conclusions,Volume-EDGAR Test Results,Volume-EDGAR Conclusions
0,MSFT,high,"[0.9993629899043458, 0.8825834076434431, 0.877...","[There is no Granger causality at lag 0, There...","[0.5565268139713697, 0.43391296513268207, 0.47...","[There is no Granger causality at lag 0, There...","[0.16372997970178485, 0.027438412709159764, 0....","[There is no Granger causality at lag 0, There...","[0.07313581019758562, 0.22859255072231552, 0.6...","[There is no Granger causality at lag 0, There..."


In [115]:
results.to_csv('../data-analysis/granger results.csv')

In [None]:
# 5 lags best 
#better to see without the tickers 
#research the results and get conclusions 