In [217]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
import matplotlib.pyplot as plt

In [218]:
topic_activity = pd.read_csv(
    'data/topic_activity.csv', 
    sep=';',
    index_col=['company', 'time_stamp']
)

stock_prices = pd.read_csv(
    'data/stocks_prices_prep.csv', 
    sep=';',
    index_col=['company', 'time_stamp']
)

df = topic_activity.join(stock_prices)

#display(df)

In [219]:
result = adfuller(df.loc['adva optical networking se', 'topic_4'].dropna())
print(f'Test Statistics: {result[0]}')
print(f'p-value: {result[1]}')
print(f'critical_values: {result[4]}')
if result[1] > 0.05:
    print("Series is not stationary")
else:
    print("Series is stationary")

Test Statistics: -8.601123779928415
p-value: 6.86458867809956e-14
critical_values: {'1%': -3.435638861796935, '5%': -2.863875547501718, '10%': -2.5680134763122906}
Series is stationary


In [235]:
%%time
df['price'] = abs(df['price'])
gc_df = None
for company in sorted(list(set(df.index.get_level_values('company')))):
    company_df = df.loc[company, :]
    for topic in company_df.columns[:-1]:
        company_topic_df = company_df[['price', topic]].dropna()
        
        try:
            gc = grangercausalitytests(
                company_topic_df,
                maxlag=5,
                addconst=True,
                verbose=False,
            )
            gc = {key:{(company, topic): value[0]['ssr_ftest'][1]} for (key, value) in gc.items()}

            if gc_df is None:
                gc_df = gc.copy()
            else:
                gc_df = {key:value | gc[key] for (key, value) in gc_df.items()}
        except:
            pass

gc_df = pd.DataFrame.from_dict(gc_df).round(4)
gc_df['is_relevant'] = gc_df.min(axis='columns') < 0.05
display(gc_df.groupby(level=1)['is_relevant'].agg(
    ['count', 'sum']
).rename(
    columns={
        'count': 'Total',
        'sum': 'Relevant'
    }
))

Unnamed: 0,Total,Relevant
topic_1,105,26
topic_2,105,36
topic_3,105,38
topic_4,105,31


CPU times: user 30.8 s, sys: 14 s, total: 44.8 s
Wall time: 5.7 s
