In [13]:
#===================================================
#
# pe.ipynb: Return standardized P/E
#
# Author: Shubhrakanti Ganguly
#===================================================

import pandas as pd
import numpy as np
from IPython.core.display import display

# get a merged dataset with all values needed to calculate P/E and the average industry values
def get_df():
    df_sec = pd.read_csv("securities.csv")
    df_fund = pd.read_csv("fundamentals.csv")
    df_price = pd.read_csv("prices-split-adjusted.csv")

    df_price = df_price[['symbol','close']].drop_duplicates(subset='symbol', keep = "last")
    df = pd.DataFrame()
    df["Ticker"] = df_fund["Ticker Symbol"]
    
    #Get the EPS from the fundamentals CSV, no iterration here because it is the same file we got tickers from 
    df["EPS"] = df_fund['Earnings Per Share']
    df = df.drop_duplicates(subset='Ticker', keep = "last")
    
    #Initialize the two other columns
    df["Industry"] = np.nan
    df["Price"] = np.nan
    
    #Merge the data from securties.csv, specifically the GICS Sector
    for x in df_sec.iterrows():
        truth = df["Ticker"]==x[1]['Ticker symbol']
        try:
            df.loc[df.loc[truth]["Ticker"].index.values[0],'Industry'] = x[1]['GICS Sector']
        except:
            pass
        
        
    #Merge the data from price.csv, specifically the Price
    for x in df_price.iterrows():
        truth = df["Ticker"]==x[1]['symbol']
        try:
            df.loc[df.loc[truth]["Ticker"].index.values[0],'Price'] = x[1]['close']
        except:
            pass

    #OPTIONAL: We are dropping all stocks with negative EPS when calculating industry averages 
    df = df.drop(df[df.EPS < 0].index)
    
    #Make the PE ratio column
    df["PE"] = round(df['Price']/df['EPS'], 2)
    df = df.dropna()

    #Compute the mean of every GICS sector
    industries_dict = {sector: np.mean(df[df.Industry == sector].PE.values) for sector in df.Industry.unique()}
    
    #Return 
    return df, industries_dict


In [15]:
def get_pe(ticker, df, industries_dict, pe=None, industry=None):
    
    #User didn't give pe or Industry so I have to find it from my list of stocks
    if not pe or not industry:
        
        #If you give me a stock I don't have data for and you don't provide data, fuck you 
        assert ticker in df.Ticker.values, "Please provide price and industry"
        
        #Fetch the PE of the stock, the average PE of the industry it's in ("avg")
        data = df.loc[df["Ticker"] == ticker]
        indus = data["Industry"].values[0]
        PE = data["PE"].values[0]
        avg = industries_dict[indus]
        
        #Reutrn the z-score of the PE relative to it's industry 
        return (PE - avg)/avg
    else:
        #Check that I have an average for your sector
        assert industry in industries_sect.keyes(), "Your sector needs to be a GICS Sector"
        avg = industries_dict[industry]
        
        #Reutrn the z-score of the PE relative to it's industry 
        return (pe - avg)/avg
    
def get_all_pe(stocks, df, industries_dict, pe=None, industry=None):
    '''
    Same as get_pe, but for a list of stocks 
    '''
    
    all_pe = []
    for i in stocks:
        all_pe.append(get_pe(i, df, industries_dict, pe, industry))
    return all_pe

if __name__ == "__main__":
    df, industries_dict = get_df()
    get_all_pe(("AAPL","MSFT"), df, industries_dict)

[-0.52777863753133802, -0.0021046767154662997]
