In [200]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
import matplotlib.pyplot as plt

In [201]:
topic_activity = pd.read_csv(
    'data/topic_activity.csv', 
    sep=';',
    index_col=['company', 'time_stamp']
)

stock_prices = pd.read_csv(
    'data/stocks_prices_prep.csv', 
    sep=';',
    index_col=['company', 'time_stamp']
)

df = topic_activity.join(stock_prices)

#display(df)

In [202]:
result = adfuller(df.loc['adva optical networking se', 'topic_4'].dropna())
print(f'Test Statistics: {result[0]}')
print(f'p-value: {result[1]}')
print(f'critical_values: {result[4]}')
if result[1] > 0.05:
    print("Series is not stationary")
else:
    print("Series is stationary")

Test Statistics: -8.601123779928415
p-value: 6.86458867809956e-14
critical_values: {'1%': -3.435638861796935, '5%': -2.863875547501718, '10%': -2.5680134763122906}
Series is stationary


In [205]:
%%time
df['price'] = abs(df['price'])
gc_df = None
for company in sorted(list(set(df.index.get_level_values('company')))):
    company_df = df.loc[company, :]
    for topic in company_df.columns[:-1]:
        company_topic_df = company_df[['price', topic]].dropna()
        
        try:
            gc = grangercausalitytests(
                company_topic_df,
                maxlag=10,
                addconst=True,
                verbose=False,
            )
            gc = {key:{(company, topic): value[0]['ssr_ftest'][1]} for (key, value) in gc.items()}

            if gc_df is None:
                gc_df = gc.copy()
            else:
                gc_df = {key:value | gc[key] for (key, value) in gc_df.items()}
        except:
            pass

pd.DataFrame.from_dict(gc_df).round(4)

CPU times: user 1min 13s, sys: 31.8 s, total: 1min 44s
Wall time: 13.3 s


Unnamed: 0,Unnamed: 1,1,2,3,4,5,6,7,8,9,10
1&1 drillisch ag,topic_1,0.1842,0.3788,0.5870,0.5097,0.5982,0.6202,0.7276,0.7434,0.7791,0.8400
1&1 drillisch ag,topic_2,0.1479,0.0000,0.0000,0.0001,0.0001,0.0002,0.0004,0.0008,0.0012,0.0014
1&1 drillisch ag,topic_3,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
1&1 drillisch ag,topic_4,0.2461,0.2816,0.2565,0.3395,0.4487,0.5526,0.3555,0.4540,0.5445,0.5877
aareal bank ag,topic_1,0.1030,0.3213,0.4804,0.2058,0.2600,0.0299,0.0207,0.0342,0.0393,0.0097
...,...,...,...,...,...,...,...,...,...,...,...
zeal network se,topic_4,0.1746,0.4080,0.5296,0.6640,0.7907,0.8487,0.8363,0.8976,0.9289,0.9335
zooplus ag,topic_1,0.5884,0.7464,0.6988,0.8016,0.8901,0.9213,0.9571,0.9823,0.9922,0.9911
zooplus ag,topic_2,0.0487,0.2491,0.4162,0.5784,0.6048,0.5014,0.4526,0.5945,0.5846,0.6189
zooplus ag,topic_3,0.0029,0.0383,0.0443,0.1607,0.2767,0.3369,0.5407,0.6909,0.7696,0.7906
