In [27]:
#===================================================
#
# pe.ipynb: Return standardized P/E
#
# Author: Shubhrakanti Ganguly
#===================================================

import pandas as pd
import numpy as np
from IPython.core.display import display

# get a merged dataset with all values needed to calculate P/E and the average industry values
def get_df():
    df_sec = pd.read_csv("securities.csv")
    df_fund = pd.read_csv("fundamentals.csv")
    df_price = pd.read_csv("prices-split-adjusted.csv")

    df_price = df_price[['symbol','close']].drop_duplicates(subset='symbol', keep = "last")
    df = pd.DataFrame()
    df["Ticker"] = df_fund["Ticker Symbol"]
    df["EPS"] = df_fund['Earnings Per Share']
    df = df.drop_duplicates(subset='Ticker', keep = "last")
    df["Industry"] = np.nan
    df["Price"] = np.nan
    
    for x in df_sec.iterrows():
        truth = df["Ticker"]==x[1]['Ticker symbol']
        try:
            df.loc[df.loc[truth]["Ticker"].index.values[0],'Industry'] = x[1]['GICS Sector']
        except:
            pass

    for x in df_price.iterrows():
        truth = df["Ticker"]==x[1]['symbol']
        try:
            df.loc[df.loc[truth]["Ticker"].index.values[0],'Price'] = x[1]['close']
        except:
            pass

    df = df.drop(df[df.EPS < 0].index)
    df["PE"] = round(df['Price']/df['EPS'], 2)
    df = df.dropna()

    industries_dict = {sector: np.mean(df[df.Industry == sector].PE.values) for sector in df.Industry.unique()}
    
    return df, industries_dict

In [41]:
def get_pe(ticker, df, industries_dict, pe=None, industry=None):
    if not pe or not industry:
        assert ticker in df.Ticker.values, "Please provide price and industry"
        data = df.loc[df["Ticker"] == ticker]
        indus = data["Industry"].values[0]
        PE = data["PE"].values[0]
        avg = industries_dict[indus]
        return (PE - avg)/avg
    else:
        assert industry in industries_sect.keyes(), "Your sector needs to be a GICS Sector"
        avg = industries_dict[industry]
        return (pe - avg)/avg
    
def get_all_pe(stocks, df, industries_dict, pe=None, industry=None):
    all_pe = []
    for i in stocks:
        all_pe.append(get_pe(i, df, industries_dict, pe, industry))
    return all_pe

if __name__ == "__main__":
    df, industries_dict = get_df()
    get_all_pe(("AAPL","MSFT"), df, industries_dict)

[-0.52777863753133802, -0.0021046767154662997]