In [None]:
import os
import json
import requests
import pandas as pd
from bs4 import BeautifulSoup
from tqdm.auto import tqdm
import time
import yfinance as yf
from scipy import stats


In [None]:
fama_french = pd.read_csv("data/F-F_Research_Data_Factors_daily.CSV", sep=";")
fama_french["date"] = pd.to_datetime(fama_french["date"], format="%Y%m%d")
fama_french.set_index("date", inplace=True)

In [None]:
cik_store = json.load(open("metastore/cik_store.json", "r"))
company_tickers = pd.read_json("data/company_tickers.json")

In [None]:
def ticker_to_cik(ticker):
    try:
        df = company_tickers.loc[ticker]
    except KeyError:
        return None
    
    if type(df) != pd.core.series.Series:
        return None
    else:
        return str(df.loc["cik_str"]).zfill(10)

In [None]:
def clean_ticker(ticker):
    return ticker.split(" ")[0]
companies = pd.read_csv("data/spx.csv", sep=";")
companies.Ticker = companies.Ticker.apply(clean_ticker)
companies["cik"] = companies.Ticker.apply(ticker_to_cik)
companies.dropna(subset=["cik"], inplace=True)
companies = companies.sample(n=100, random_state=1337).set_index("cik").copy()

In [None]:
def get_bhr(quotes, filing_date, days):
    df = quotes[filing_date:].copy()
    close = df.iloc[:days + 1]["Open"]
    return (close.iloc[-1] - close.iloc[0]) / close.iloc[0]

In [None]:
def get_fama_french(date, days):
    if days < 0:
        return fama_french.iloc[:fama_french.index.get_indexer([date], method="nearest")[0]].iloc[days-1:]
    return fama_french.iloc[fama_french.index.get_indexer([date], method="nearest")[0]:].iloc[:days + 1]

def get_series_data(quotes, date, days):
    if days < 0:
        stock_returns = quotes.pct_change().dropna(subset=["Open"])[:date].iloc[days-1:].copy()
        stock_df = quotes[:date].copy()
        stock_short = stock_df.iloc[days-1:]
    else:
        stock_returns = quotes.pct_change().dropna(subset=["Open"])[date:].iloc[:days+1].copy()
        stock_df = quotes[date:].copy()
        stock_short = stock_df.iloc[:days + 1]
    stock_return = (stock_short["Open"].iloc[-1] - stock_short["Open"].iloc[0]) / stock_short["Open"].iloc[0]
    return stock_short, stock_return, stock_returns

def calc_beta(stock, market):
    cov_df = pd.DataFrame(stock["Open"].rename("Stock")).join(market["Open"].rename("Market")).cov()  
    cov = cov_df["Stock"]["Market"]
    var = cov_df["Market"]["Market"]
    return cov / var

def jensens_alpha(quotes, market, filing_date, days):
    stock_df, rs, stock_returns = get_series_data(quotes, filing_date, days)
    market_df, rm, market_returns = get_series_data(market, filing_date, days)
    fama_french_params = get_fama_french(filing_date, days)
    if days < 0:
        rf = ((stats.gmean((fama_french_params["RF"] / 100) + 1)) ** (days*-1)) - 1
    else:
        rf = ((stats.gmean((fama_french_params["RF"] / 100) + 1)) ** days) - 1
    beta = calc_beta(stock_returns, market_returns)
    jensens_alpha_value = rs - (rf + beta * (rm - rf))
    return jensens_alpha_value

def get_volume_momentum(quotes, filing_date, lookback=252, tailway=6):
    volume_df = quotes.loc[:filing_date].iloc[-lookback:-1]
    mean_volume = volume_df.iloc[:-tailway]["Volume"].mean()
    std_volume = volume_df.iloc[:-tailway]["Volume"].std()
    
    pre_filing_volume = volume_df.iloc[-tailway:]["Volume"].mean()
    return pre_filing_volume / mean_volume, mean_volume, std_volume

def get_post_filing_mean_volume(quotes, filing_date, days=4):
    df = quotes[filing_date:].copy()
    return df.iloc[:days + 1]["Volume"].mean()

In [None]:
cik_store_priced = {}
generic_storage = []
skipped_acc_nr = []
sp500 = yf.Ticker("^GSPC").history(period="max")
for cik in tqdm(cik_store):
    ticker = companies.loc[cik, "Ticker"]
    quotes = yf.Ticker(ticker).history(period="max")
    filing_df = pd.read_json(cik_store[cik])
    for index, filing in filing_df.iterrows():
        if min(quotes.index) > pd.to_datetime(filing["filingDate"]):
            skipped_acc_nr.append(filing["accessionNumber"])
            continue
        filing_df.loc[index, "stockBHR_4"] = get_bhr(quotes, filing["filingDate"], 4)
        filing_df.loc[index, "indexBHR_4"] = get_bhr(sp500, filing["filingDate"], 4)
        filing_df.loc[index, "stockBHR_10"] = get_bhr(quotes, filing["filingDate"], 10)
        filing_df.loc[index, "indexBHR_10"] = get_bhr(sp500, filing["filingDate"], 10)
        filing_df.loc[index, "stockBHR_20"] = get_bhr(quotes, filing["filingDate"], 20)
        filing_df.loc[index, "indexBHR_20"] = get_bhr(sp500, filing["filingDate"], 20)
        
        filing_df.loc[index, "jensens_alpha_value_1"] = jensens_alpha(quotes, sp500, filing["filingDate"], 1)
        filing_df.loc[index, "jensens_alpha_value_4"] = jensens_alpha(quotes, sp500, filing["filingDate"], 4)
        filing_df.loc[index, "jensens_alpha_value_10"] = jensens_alpha(quotes, sp500, filing["filingDate"], 10)
        filing_df.loc[index, "jensens_alpha_value_20"] = jensens_alpha(quotes, sp500, filing["filingDate"], 30)
        
        filing_df.loc[index, "pre_filing_alpha_2"] = jensens_alpha(quotes, sp500, filing["filingDate"], -2)
        filing_df.loc[index, "pre_filing_alpha_6"] = jensens_alpha(quotes, sp500, filing["filingDate"], -6)
        filing_df.loc[index, "pre_filing_alpha_20"] = jensens_alpha(quotes, sp500, filing["filingDate"], -20)
        
        volume_momentum, pre_filing_mean_volume, pre_filing_std_volume = get_volume_momentum(quotes, filing["filingDate"])
        post_filing_mean_volume = get_post_filing_mean_volume(quotes, filing["filingDate"])
        filing_df.loc[index, "volume_momentum"] = volume_momentum
        filing_df.loc[index, "abnormal_volume"] = (post_filing_mean_volume - pre_filing_mean_volume) / pre_filing_std_volume
        
        
    filing_df["stockBHAR_4"] = filing_df["stockBHR_4"] - filing_df["indexBHR_4"]
    filing_df["stockBHAR_10"] = filing_df["stockBHR_10"] - filing_df["indexBHR_10"]
    filing_df["stockBHAR_20"] = filing_df["stockBHR_20"] - filing_df["indexBHR_20"]
    
    
    cik_store_priced[cik] = filing_df.to_json()
    json.dump(cik_store_priced, open("metastore/cik_store_priced.json", "w"))
    json.dump(skipped_acc_nr, open("metastore/skipped_acc_nr.json", "w"))