In [1]:
import requests
import pandas as pd
import numpy as np
import pickle
import yfinance as yf

In [2]:
# Load the data from each file
file_paths = ["index_history_new.pkl", "saved_indexes.pkl", "saved_symbols.pkl", "ticker_history_new.pkl", "event_clean.pkl"]


data_dict = {}

for path in file_paths:
    with open(path, "rb") as file:
        data = pickle.load(file, encoding='latin1')
        data_dict[path] = data

# Access the loaded data
index_history = data_dict["index_history_new.pkl"] 
saved_indices = data_dict["saved_indexes.pkl"]
saved_tickers = data_dict["saved_symbols.pkl"]
ticker_history = data_dict["ticker_history_new.pkl"]
event_history = data_dict["event_clean.pkl"]

dict_tickers = ticker_history.copy()
dict_index = index_history.copy()

1. index_history has the last 5 years of index data for top 20 indices (from May 2019 - May 2023)
2. saved_indices has the top 20 index tickers
3. saved_tickers has the top 100 NASDAQ tickers
4. ticker_history has the last 5 years stock data for top 100 NASDAQ tickers (from May 2019 - May 2023)
5. event_history has the earnings date info from Jan 2019 until December 2023 for the saved_tickers

We might have to include data for index_history and ticker_history from Jan 2019 to May 2019 since the earnings data starts from Jan 2019-Dec 2023


1. index_history : dict with keys as index tickers. Value is a dataframe with 5 years of history (Date,Open,High,Low,Close,Volume,Dividends,Stock Splits)
2. saved_indices : list of keys of index_history
3. saved_tickers : list of keys of ticker_history
4. ticker_history : dict with keys as NASDAQ100 stock tickers. Value is a dataframe with 5 years of history (Date,Open,High,Low,Close,Volume,Dividends,Stock Splits)
5. event_history : dict with keys as NASDAQ100 stock tickers. Value is a dataframe with 5 years of history (Date,EPS Estimate, Reported EPS, Surprise)

In [3]:
# big = yf.Tickers(saved_tickers)
# dict_tickers_new = {i:big.tickers[i].history(period = "66mo") for i in saved_tickers}
# major_indices = yf.Tickers(saved_indices)
# dict_indices_new = {i:major_indices.tickers[i].history(period = "66mo") for i in saved_indices}

In [None]:
# with open('ticker_history_new.pkl', 'wb') as f:
#     pickle.dump(dict_tickers_new, f)
# with open('index_history_new.pkl', 'wb') as f:
#     pickle.dump(dict_indices_new, f)

In [3]:
us_indices = ['^GSPC','^DJI','^IXIC','^NYA','^XAX','^RUT']
closing_time_from_midnight = pd.DateOffset(hours=16) # Closing time of NYSE(4pm). Closing time matters because we want
# to determine if the closing time is before or after event. # For pre-market earning events, 
# the 'before' data should not include the data from that day. 

closing_time_from_midnight_ind = {}
us_indices = ['^GSPC','^DJI','^IXIC','^NYA','^XAX','^RUT']
for ind in us_indices:
    closing_time_from_midnight_ind[ind] = pd.DateOffset(hours=16) # Closing time of NYSE
closing_time_from_midnight_ind['^VIX'] = pd.DateOffset(hours=15.25)
closing_time_from_midnight_ind['^BUK100P'] = pd.DateOffset(hours=17.5)
closing_time_from_midnight_ind['^FTSE'] = pd.DateOffset(hours=16.5)
closing_time_from_midnight_ind['^GDAXI'] = pd.DateOffset(hours=18)
closing_time_from_midnight_ind['^FCHI'] = pd.DateOffset(hours=17.5)
closing_time_from_midnight_ind['^STOXX50E'] = pd.DateOffset(hours=18)
closing_time_from_midnight_ind['^N100'] = pd.DateOffset(hours=17.5)
closing_time_from_midnight_ind['^BFX'] = pd.DateOffset(hours=17.5)
closing_time_from_midnight_ind['IMOEX.ME'] = pd.DateOffset(hours=16)
closing_time_from_midnight_ind['^N225'] = pd.DateOffset(hours=15)
closing_time_from_midnight_ind['^HSI'] = pd.DateOffset(hours=16)
closing_time_from_midnight_ind['000001.SS'] = pd.DateOffset(hours=15)
closing_time_from_midnight_ind['399001.SZ'] = pd.DateOffset(hours=15)
closing_time_from_midnight_ind['^STI'] = pd.DateOffset(hours=17)

In [4]:
d = {}
indices = saved_indices

for sym in event_history:
    dum = []
    event_date = event_history[sym].index
    month_before_event_date = event_date - pd.DateOffset(months=1)
    week_after_event_date = event_date + pd.DateOffset(weeks=1)
        
    for j in range(len(event_history[sym])):
        dic = {"Before":0, "After":0, "Earning":0}
        dic['Earning'] = event_history[sym].iloc[[j]]
        
        ticker_closing = ticker_history[sym].index+closing_time_from_midnight
        dfb = ticker_history[sym][["Close"]][(ticker_closing>month_before_event_date[j]) & (ticker_closing<=event_date[j])]
        dfb["Closing DateTime"] = dfb.index+closing_time_from_midnight
        dfb = dfb[["Closing DateTime","Close"]]
        dfb = dfb.rename(columns = {"Close":sym})
        dfb = dfb.set_index('Closing DateTime')
        for ind in indices:
            index_closing = index_history[ind].index+closing_time_from_midnight_ind[ind]
            dfbi = index_history[ind][["Close"]][(index_closing>month_before_event_date[j]) & (index_closing<=event_date[j])]            
            dfbi["Closing DateTime"] = dfbi.index+closing_time_from_midnight_ind[ind]
            dfbi = dfbi.rename(columns={"Close":ind})
            dfb = dfb.join(dfbi.set_index("Closing DateTime"),on="Closing DateTime",how="outer")
        dfb.index = range(len(dfb))
        dic["Before"] = dfb

        ticker_closing = ticker_history[sym].index+closing_time_from_midnight
        dfa = ticker_history[sym][["Close"]][(ticker_closing>event_date[j]) & (ticker_closing<=week_after_event_date[j])]
        dfa["Closing DateTime"] = dfa.index+closing_time_from_midnight
        dfa = dfa[["Closing DateTime","Close"]]
        dfa = dfa.rename(columns = {"Close":sym})
        dfa = dfa.set_index('Closing DateTime')
        for ind in indices:
            index_closing = index_history[ind].index+closing_time_from_midnight_ind[ind]
            dfai = index_history[ind][["Close"]][(index_closing>event_date[j]) & (index_closing<=week_after_event_date[j])]            
            dfai["Closing DateTime"] = dfai.index+closing_time_from_midnight_ind[ind]
            dfai = dfai.rename(columns={"Close":ind})
            dfa = dfa.join(dfai.set_index("Closing DateTime"),on="Closing DateTime",how="outer")
        dfa.index = range(len(dfa))
        dic["After"] = dfa
        
        dum.append(dic)
    d[sym] = dum
print("Done")

Done


In [7]:
with open('1mobef_1wkaft_earnings_data_clean_new.pkl', 'wb') as f:
    pickle.dump(d, f)

This pickle file is a dictionary whose keys are Tickers with values being a list of dictionaries. Each of these dictionaries has an 'earnings' key with value as earnings dataframe, 'before'('after') key with value dataframe of ticker's and indices closing data for 1 month before (1 week after) the earnings event. 

The 'before' dataframe has columns as 'Closing DateTime', 'Ticker', all indices. 

The Closing DateTime is the list of all closing times corresponding to dates in the 1 month window for ALL the indices (noting the timezone diff for each index as well as the local closing times). 

For example, 2023-12-13 16:00:00-05:00 will have a valid entry only for 'Ticker', US indices as only those would close at the corresponding time. All others would be nan since they don't close at the same time. For example, STI corresponds to an index in Singapore which closes at 2023-12-13 04:00:00-05:00. They have a different time. Hence, we add the closing price of STI to the row corresponding to that time. 

If we want to access the dates and closing prices of ^GSPC before ADBE's most recent earning, we can use the following code to get the corresponding dataframe.  

d1 = d['ADBE'][0]['Before']

d1[pd.notna(d1['^GSPC'])][['Closing DateTime','^GSPC']]