In [9]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
import matplotlib.pyplot as plt

In [10]:
topic_activity = pd.read_csv(
    'data/topic_activity.csv', 
    sep=';',
    index_col=['company', 'time_stamp']
)

stock_prices = pd.read_csv(
    'data/stocks_prices_prep.csv', 
    sep=';',
    index_col=['company', 'time_stamp']
)

df = topic_activity.join(stock_prices)

#display(df)

In [11]:
result = adfuller(df.loc['adva optical networking se', 'topic_4'].dropna())
print(f'Test Statistics: {result[0]}')
print(f'p-value: {result[1]}')
print(f'critical_values: {result[4]}')
if result[1] > 0.05:
    print("Series is not stationary")
else:
    print("Series is stationary")

Test Statistics: -7.93603768968752
p-value: 3.411611584837627e-12
critical_values: {'1%': -3.4356560275160835, '5%': -2.8638831211270817, '10%': -2.568017509711682}
Series is stationary


In [16]:
%%time
df['price'] = abs(df['price'])
gc_df = None
for company in sorted(list(set(df.index.get_level_values('company')))):
    company_df = df.loc[company, :]
    for topic in company_df.columns[:-1]:
        company_topic_df = company_df[['price', topic]].dropna()
        
        try:
            gc = grangercausalitytests(
                company_topic_df,
                maxlag=5,
                addconst=True,
                verbose=True,
            )
            display(gc)
            gc = {key:{(company, topic): value[0]['ssr_ftest'][1]} for (key, value) in gc.items()}

            if gc_df is None:
                gc_df = gc.copy()
            else:
                gc_df = {key:value | gc[key] for (key, value) in gc_df.items()}
        except:
            pass
        break
    break

gc_df = pd.DataFrame.from_dict(gc_df).round(4)
gc_df['is_relevant'] = gc_df.min(axis='columns') < 0.05
display(gc_df.groupby(level=1)['is_relevant'].agg(
    ['count', 'sum']
).rename(
    columns={
        'count': 'Total',
        'sum': 'Relevant'
    }
).sort_values('Relevant'))


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=9.6571  , p=0.0019  , df_denom=1221, df_num=1
ssr based chi2 test:   chi2=9.6808  , p=0.0019  , df=1
likelihood ratio test: chi2=9.6427  , p=0.0019  , df=1
parameter F test:         F=9.6571  , p=0.0019  , df_denom=1221, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=9.2270  , p=0.0001  , df_denom=1218, df_num=2
ssr based chi2 test:   chi2=18.5297 , p=0.0001  , df=2
likelihood ratio test: chi2=18.3907 , p=0.0001  , df=2
parameter F test:         F=9.2270  , p=0.0001  , df_denom=1218, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=5.9366  , p=0.0005  , df_denom=1215, df_num=3
ssr based chi2 test:   chi2=17.9123 , p=0.0005  , df=3
likelihood ratio test: chi2=17.7823 , p=0.0005  , df=3
parameter F test:         F=5.9366  , p=0.0005  , df_denom=1215, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=4.5316  , p=0.

{1: ({'ssr_ftest': (9.657105451054878, 0.001929755355949934, 1221.0, 1),
   'ssr_chi2test': (9.680832982875653, 0.0018620019462680427, 1),
   'lrtest': (9.64274994264997, 0.0019010041924098795, 1),
   'params_ftest': (9.657105451055115, 0.0019297553559496034, 1221.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fc70bcf6a10>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fc70f3b4eb0>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (9.226963276188425, 0.00010539846531755782, 1218.0, 2),
   'ssr_chi2test': (18.52968158748513, 9.469581094626446e-05, 2),
   'lrtest': (18.39071184912791, 0.0001015097277103451, 2),
   'params_ftest': (9.226963276188371, 0.00010539846531755782, 1218.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fc70fc1a500>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fc6f601db10>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr

Unnamed: 0,Total,Relevant
topic_1,1,1


CPU times: user 152 ms, sys: 118 ms, total: 270 ms
Wall time: 78.3 ms
