# Analysis of quantitative market variables of US Equities around earnings seasons
### Authored by Sarang Balan under guidance of Jean-Phillipe Maltais

This python notebook conducts a quantitative and qualitative analysis around the market variables of public US companies to discover, analyse and understand potential patterns in the markets during earning seasons

### Importing Libraries

In [1]:
### importing libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as skl
import yfinance as yf
import html5lib
import datetime
import time
from datetime import date
from datetime import datetime
from datetime import timedelta



In [22]:
import eventlet
eventlet.monkey_patch()

In [2]:
from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

### Collecting earnings dates

The yfinance library accesses earnings reports of public companies from the Yahoo Finance website. Note, this library and API access is not officially related to Yahoo, and the data is of acceptable quality for the purposes of this project. 

First step is to create a python function that takes in tickers and start/end date and outputs precise dates of the earnings 

In [3]:
def get_earnings_dates(ticker: str, start_date: str, end_date: str):
    """
    This function takes three arguments: stock ticker, start date, and end date - to return a list of datetime format
    strings of earning release dates within the given date period.
    
    Arguments:
        ticker (str): Ticker of the Equity
        start_date (str/datetime): Start date after which to bring up and save the dates of future releases
        end_date (str/datetime): End date before which all earnings release dates have to be saved
    
    returns:
        list_dates: list of dates of earning releases
        myepslist: list of EPS relative performance (actual vs predictive) in %
        
    """
    
    stock = yf.Ticker(ticker)
    
    df = stock.earnings_dates
    start_date = datetime.strptime(start_date, '%Y-%m-%d')
    end_date = datetime.strptime(end_date, '%Y-%m-%d')
    df.index = df.index.tz_localize(None)
    
    mask = (df.index > start_date) & (df.index<= end_date)
    mydf = df.loc[mask]
    
    list_dates = list(mydf.index)
    
    myepslist = list(mydf['Surprise(%)'])
    
    return list_dates, myepslist
    

### Collecting stock prices

Now, given a list of dates for earning releases, we need to create a function that pulls in hourly stock prices from T - 2 trading days and T + 2  trading days, where T is the market closing time of the earnings release. 

In [4]:
def get_prices(ticker_history, index_history, t_date: datetime, find_percentage_change: bool=True, ):
    """
    Captures hourly prices 14 trading hours before market close (day of release) and 14 trading hours after next open
    
    Arguments:
        tracker_history: Ticker of the stock 
        t_date (datetime): Date of earnings release
        find_percentage_change (bool): Return list in terms of % change from first hour price recorded (T-2 days)
        
    return:
        pricelist: list of prices (or % changes) over 5 trading days
        timelist: list of times corresponding to pricelist items
        indexlist: list of index % changes over 5 trading days
        
    """
    
    #Data of each stock and index
    
    stock = yf.Ticker("MSFT")



    # get historical market data
    hist = ticker_history
    data = hist
    
    
    indexdata = index_history #yf.download('^GSPC', interval='1h', period='2y')
    
    #List of dates and prices to return
    mylist = []
    timelist = []
    indexlist = []
    
    #Earnings release date is given argument in the function
    T = t_date
    
    #Loop over T-2 to T+2 days
    for delta in range(-2,3,1):
    
         #Get data from the current day being analysed in the loop
        Tnow = T + timedelta(days=delta)
        Tnow = Tnow.to_pydatetime().date().strftime('%Y-%m-%d')
        mydf = data.loc[Tnow]
        myindexdf = indexdata.loc[Tnow]
       
       
        #If weekend, move onto the next appropriate day and try again
        if mydf.empty and delta < 0:
            Tnow = T + timedelta(days=delta-2)
            Tnow = Tnow.to_pydatetime().date().strftime('%Y-%m-%d')
            mydf = data.loc[Tnow]
            myindexdf = indexdata.loc[Tnow]
            
        if mydf.empty and delta > 0:
            Tnow = T + timedelta(days=delta+2)
            Tnow = Tnow.to_pydatetime().date().strftime('%Y-%m-%d')
            mydf = data.loc[Tnow]
            myindexdf = indexdata.loc[Tnow]
            
            
        
            
            
        mylist.extend(list(mydf.Close))
        indexlist.extend(list(myindexdf.Close))
        timelist.extend(list(mydf.index))
    
    
    if find_percentage_change:
        pricelist = [x / mylist[0] for x in mylist]
        indexlist = [x /indexlist[0] for x in indexlist]
    
    return pricelist, indexlist

### Creating a function that retrieves US Treasury Bond data

One very important market variable is the current performance of US Treasury bond yields. This is a useful indicator for risk-free interests as well as an indicator of economic confidence.

In [5]:
def get_treasury_yield(period: int):
    """
    Retrieves US Treasury Bond yield for a given period of time at a given date
    
    Arguments:
        period (int): 5,7, or 10 year yield
        date (list): list of dates in YYYY-MM-DD format for which to pick yield data out of
    
    returns:
        close_yield: Close yield for that particular bond on that particular set of dates
    """
    
    bond_dict = {
        10: '^TNX',
        5: '^FVX',
        30: '^TVX'
        
    }
    
    selectedbond = bond_dict[period]
    
    bill = yf.Ticker(selectedbond)
    hist = bill.history(period="max")
    
   
    return hist

In [6]:
# Ticker,  Prices from T-2 to T+2, SPX Index change from T-2 to T+2,  Treasury bill 10 yr yield, Treaury bill 10yr - 2yr, Sector, Rating, EPS consensus

### Retrieving Sector Data

Sector of the particular Equity can be retrieved from the following code

In [7]:
def get_sector(ticker: str):
    stock = yf.Ticker(ticker)
    
    return stock.info['sector']

### Retrieving past quarter EPS

Retrieving EPS data of the year-over-year quarter. 

In [8]:
stock = yf.Ticker('MSFT')

stock

yfinance.Ticker object <MSFT>

### Creating a section of our final database

This section will collect all the information pertaining to one company's past 7 or 8 earning seasons and place it in a df of standard format. The written code here can then be used repeatedly for tickers of other companies

In [35]:
# Ticker,  Prices from T-2 to T+2, SPX Index change from T-2 to T+2,  Treasury bill 10 yr yield, Treaury bill 10yr - 2yr, Sector, EPS consensus
colnames = ['date', 'ticker', 'TB10yr', 'TBslope', 'sector', 'EPS', 'stock_price', 'snp_price']
mydict = {k: [] for k in colnames}
mydict

{'date': [],
 'ticker': [],
 'TB10yr': [],
 'TBslope': [],
 'sector': [],
 'EPS': [],
 'stock_price': [],
 'snp_price': []}

In [17]:
stocklist = pd.read_csv('constituents.csv')
stocklist = stocklist[stocklist.Sector != 'Financials']
stocklist

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care
...,...,...,...
499,XYL,Xylem,Industrials
500,YUM,Yum! Brands,Consumer Discretionary
501,ZBRA,Zebra Technologies,Information Technology
502,ZBH,Zimmer Biomet,Health Care


In [18]:
TenYearYields = get_treasury_yield(period=10)
FiveYearYields = get_treasury_yield(period=5)

In [19]:
indexhist = yf.download('^GSPC', interval='1h', period='2y')

[*********************100%***********************]  1 of 1 completed


In [64]:


mytickers = yf.Tickers(list(stocklist['Symbol'])[15:80])




for idx, (tick, v) in enumerate(mytickers.tickers.items()):
    
    if tick not in failedlist:
        continue
    start_time = time.time()
    print(tick)
    rowlist = []
    
    #Data of each stock and index
    stock = v
    
    
    # get historical market data
    hist = stock.history(period='2y', interval='1h')
    
    
    
    
    #Get earnings dates
    try:
        mylist, myepslist = get_earnings_dates(ticker=tick, start_date='2020-08-26', end_date='2022-08-26')   
    except:
        print(f'bruh {tick}')
        failedlist.append(tick)
        continue
        
        
    
    ###SECTOR
    mysector = get_sector(tick)
    
    ###TICKER
    
    myticker = tick
    
    #continue
    for idx,item in enumerate(mylist):
        
        try:
            rowlist = []
            #print(item)
            rowlist.append(item)
            rowlist.append(mysector)
            rowlist.append(myticker)
            rowlist.append('EPS')
            rowlist.append(myepslist[idx])
        
        
            pricelist, indexlist = get_prices(ticker_history=hist, index_history=indexhist, t_date=item )
            rowlist.append('price')
            rowlist.extend(pricelist)
        
            rowlist.append('snpindex')
            rowlist.extend(indexlist)
        except:
            rowlist = []
            continue
        
        dflist.append(rowlist)
        
    
    #print(rowlist)
    
    print("--- %s seconds ---" % (time.time() - start_time))
    
    

MO
--- 15.795258283615112 seconds ---
AMZN
--- 15.682317972183228 seconds ---
AMCR
--- 13.501492261886597 seconds ---
AEE
--- 15.405184745788574 seconds ---
AAL
--- 14.965453386306763 seconds ---
AEP
--- 15.215442419052124 seconds ---
AMT
--- 14.637720108032227 seconds ---
AWK
--- 13.73195767402649 seconds ---
ABC
--- 14.373051881790161 seconds ---
AME
--- 42.29565906524658 seconds ---
ANSS
bruh ANSS
ANTM
- ANTM: No data found, symbol may be delisted
bruh ANTM
APA
bruh APA
AAPL
bruh AAPL
AMAT
bruh AMAT
APTV
bruh APTV
ANET
bruh ANET
T
bruh T
ATO
bruh ATO
ADSK
--- 17.139145135879517 seconds ---
AVY
--- 15.066586017608643 seconds ---
BKR
--- 25.25567388534546 seconds ---
BLL
- BLL: No data found, symbol may be delisted
bruh BLL
BBWI
bruh BBWI
BAX
bruh BAX
BDX
bruh BDX
BBY
bruh BBY
BIO
bruh BIO
TECH
bruh TECH
BIIB
bruh BIIB
BXP
bruh BXP
BSX
--- 15.75613784790039 seconds ---
BMY
--- 15.69184136390686 seconds ---
AVGO
--- 31.83110761642456 seconds ---
BR
bruh BR
BF.B
- BF.B: No data found fo