In [1]:
import os
import pandas as pd
from decimal import Decimal
import numpy as np
from datetime import datetime, timedelta
from dotenv import load_dotenv
import yfinance as yf


import matplotlib.pyplot as plt
import plotly.express as px
import pytz  # Make sure to import pytz for timezone handling
import seaborn as sns


import requests
import csv
import json

import warnings

In [2]:
load_dotenv()

API_KEY = os.getenv("alpha_vantage_api_key")
# NASDAQ_DATA_LINK_API_KEY = os.getenv("NASDAQ_DATA_LINK_API_KEY")

In [3]:
# Addtional setting session
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")
pd.options.mode.copy_on_write = True

In [4]:
# Parameters section

alpha_vantage_api_key = API_KEY # FREE TIER API rate limit is 25 requests per day
alpha_vantage_function = {
    'core':[
        'TIME_SERIES_INTRADA'
        ,'TIME_SERIES_DAILY' # this is daily time series quote
        ,'TIME_SERIES_DAILY_ADJUSTED' # this is daily time series adjusted by split/dividend-adjusted
        ,'GLOBAL_QUOTE'
    ]
    ,'fundmental':[
    'INCOME_STATEMENT'
    ,'BALANCE_SHEET' # this is daily time series quote
    ,'CASH_FLOW' # this is daily time series adjusted by split/dividend-adjusted
    ,'EARNINGS'
    ,'EARNINGS_CALENDAR'
]
}

# JP market
# ticker_symbols = [

#     'SMFG'
#     ,'MUFG'
#     ,'TM'
#     ,'MITSY'
# ]

ticker_symbols = [

#     # INDEX
    # 'IVV'
#     ,'QQQ'

#     # energy
    'SLB'
#     ,'HAL'
#     ,'OKE'
    # ,'OXY'

#     # consumer staples
    # ,'TGT'
    # ,'STZ'
    # ,'DLTR'
    # ,'DG'
#     ,'TSN'
#     ,'MNST'
#     ,'PG'
#     ,'PM'
#     ,'MO'
    # ,'KR'


#     # finanace
    ,'TRV'
#     ,'CB'
#     ,'BAC'
    # ,'BRK.B'

#     # technology
#     ,'NVDA'
#     ,'MSFT'
#     ,'AMZN'

      # energy
    #   ,'NEE'


      # healthcare
    #   ,'BSX'


      # transportation
    #   ,'CNI'
    #   ,'CP'


    # consumer discre
    # ,'SBUX'


#     # holding stock
    ,'TSM'
    ,'ACN'
    ,'DHI'
    ,'STZ'
    ,'GOOG'
    ,'TGT'
]

# Consumer Staples
# ticker_symbols = [
#     'MO',
#     'ADM',
#     # 'BF.B', # DATA ISSUE
#     'BG',
#     'CPB',
#     'CHD',
#     'CLX',
#     'KO',
#     'CL',
#     'CAG',
#     'STZ', # !!
#     'COST',
#     'DG',
#     'DLTR', # !!
#     'EL',
#     'GIS',
#     'HSY',
#     'HRL',
#     'K',
#     'KVUE',
#     'KDP',
#     'KMB',
#     'KHC',
#     # 'KR', # DATA ISSUE
#     'LW',
#     # 'MKC', # DATA ISSUE
#     'TAP',
#     'MDLZ',
#     'MNST',
#     'PEP',
#     'PM',
#     'PG',
#     'SJM',
#     'SYY',
#     'TGT', # !!
#     'TSN', # !!
#     'WBA', # DATA ISSUE
#     'WMT'
# ]

# Energy
# ticker_symbols = [
# 'APA',
#  'BKR', # !! HIGH PE, LOW PROFITABILITY, HIGH FORECAST,  BUT CANNOT BEAT FORECAST, 总结与TRGP类似, 基本面更稳妥, 盈利预期高, 有待考证
#  'CVX',
#  'COP',
#  'CTRA',
#  'DVN',
#  'FANG',
#  'EOG',
#  'EQT',
#  'XOM',
#  'HAL', # !!!! 与SLB类似
#  'HES', # !  HIGH PE, UNDERVALUE, 市场炒作高预期, 2024表现强劲, 已经被超热了
#  'KMI',
#  'MRO',
#  'MPC',
#  'OXY',
#  'OKE', # !!
#  'PSX',
#  'SLB', # !!! A BIT HIGH PE, GOOD ROE, MED PROFITABILITY, STEADY GROWTH FORECAST, AND CAN BEAT FORECAST, 总结基本面良好，与HAL类型相似, 市场炒作热度比HAL更高, 值得研究
#  'TRGP', # !！ HGIH PE, HIGH ROE, RELATIVE MORE DEBT THAN EQUITY, LOW PROFITABILITY, HIGH FORECAST, BUT CANNOT BEAT FORECAST, 总结基本面一般, 但是盈利预期非常高, 有待考证
#  'VLO',
#  'WMB'
#  ]


# # Consumer Discretionary
# ticker_symbols = [
#  'ABNB',
#  'AMZN',
#  'APTV',
#  'AZO',
#  'BBWI',
#  'BBY',
#  'BKNG',
#  'BWA',
#  'CZR',
#  'KMX',
#  'CCL',
#  'CMG',
#  'DRI',
#  'DECK',
#  'DPZ',
#  'DHI',
#  'EBAY',
#  'ETSY',
#  'EXPE',
#  'F',
#  'GRMN',
#  'GM',
#  'GPC',
#  'HAS',
#  'HLT',
#  'HD',
#  'LVS',
#  'LEN',
#  'LKQ',
#  'LOW',
#  'LULU',
#  'MAR',
#  'MCD',
#  'MGM',
#  'MHK',
#  'NKE',
#  'NCLH',
#  'NVR',
#  'ORLY',
#  'POOL',
#  'PHM',
#  'RL',
#  'ROST',
#  'RCL',
#  'SBUX',
#  'TPR',
#  'TSLA',
#  'TJX',
#  'TSCO',
#  'ULTA',
#  'WYNN',
#  'YUM'
#  ]

# Time intelligent parameters
window_days = 90
end_date = datetime.now()
start_date = end_date - timedelta(days=window_days)
earning_calendar = [
    3  # this will return next 1 qtr forecast earning; nowadays the earning calendar only shows the next 1 qtr forecast earning
    ,6  # this will return next 2 qtr forecast earning
    ,12  # this will return next 4 qtr forecast earning
]

PE_yr_range = 6 # this will return x-1 yr PE range

ticker_dict_json = {}
ticker_dict_pd = {}

# PE TTM Valuation

In [9]:
# Daily quote section
for j, symbol in enumerate(ticker_symbols):


    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df = pd.DataFrame()
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()


    # Daily quote section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={alpha_vantage_api_key}&outputsize=full'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Time Series (Daily)':


            selected_cols = [
                '4. close'
            ]

            Daily_stock_df = pd.DataFrame(value).transpose()[selected_cols] # tranpose the dataframe and sub select selected cols

            # Rename columns
            Daily_stock_df.rename(
                columns={
                    '4. close': 'stock_price'
                    }
                ,inplace=True
                )
            
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].round(2)
            Daily_stock_df.index = pd.to_datetime(Daily_stock_df.index)


    for date_i in Daily_stock_df.index.date:
        for date_j in stock_split_record_df['effective_date'].dt.date:
            if date_i == date_j:

                # stock price to divided the split factor
                Daily_stock_df.loc[Daily_stock_df.index.date < date_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.date == date_j].values[0])


    # Monthly quote section
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Monthly Time Series':
            Monthly_stock_df = pd.DataFrame(value)

    Monthly_stock_df = Monthly_stock_df.transpose()
    Monthly_stock_df.index = pd.to_datetime(Monthly_stock_df.index)


    filter_1 = (Monthly_stock_df.index.year.isin(range((datetime.today().year - PE_yr_range) ,datetime.today().year)))
    filter_2 = (Monthly_stock_df.index.month == 12) # month = 12 to get the year end closing price

    selected_cols = [
        '4. close'
    ]

    Monthly_stock_df = Monthly_stock_df[
        filter_1
        & filter_2
    ][selected_cols]

    # Rename columns
    Monthly_stock_df.rename(
        columns={
            '4. close': 'stock_price'
            }
        ,inplace=True
        )

    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].round(2)

    # modify stock price based on stock split
    for year_i in Monthly_stock_df.index.year:
        for year_j in stock_split_record_df['effective_date'].dt.year:
            if year_i == year_j:

                # stock price to divided the split factor
                Monthly_stock_df.loc[Monthly_stock_df.index.year < year_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.year == year_j].values[0])



    # Earning section
    # past earnings from alpha vintage API
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year

            annualEPS_df = annualEPS_df[
                annualEPS_df['fiscalDateEnding'].isin(
                    range(
                        (datetime.today().year - 6) 
                        ,datetime.today().year
                            )
                            )
                            ]

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=False).reset_index(drop=True)

            # calculate metrics
            if Monthly_stock_df.shape[0] <= annualEPS_df.shape[0]: # if Monthly_stock_df has less records than annualEPS_df, choose the mini length record
                annualEPS_df = annualEPS_df[:(Monthly_stock_df.shape[0])]

                annualEPS_df["PE"] = Monthly_stock_df["stock_price"].values / annualEPS_df['reportedEPS'].values
                annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"] = annualEPS_df["PE"].mean().round(2)
                annualEPS_df[f"PE_{PE_yr_range-1}yr_std"] = np.std(annualEPS_df["PE"]).round(2)
                annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_+"] = (annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"] + annualEPS_df[f"PE_{PE_yr_range-1}yr_std"]).round(2) # 这个是PE的波动范围上限
                annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_-"] = (annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"] - annualEPS_df[f"PE_{PE_yr_range-1}yr_std"]).round(2) # 这个是PE的波动范围下限



        if key == 'quarterlyEarnings':

            selected_cols = [
                'reportedDate'
                ,'reportedEPS'
            ]

            qtrEPS_df = pd.DataFrame(value)[selected_cols] # tranpose the dataframe and sub select selected cols
            qtrEPS_df['reportedDate'] = pd.to_datetime(qtrEPS_df['reportedDate'])

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    qtrEPS_df[col] = qtrEPS_df[col].astype(str).apply(lambda x: float(x) if x not in [None, 'None', 'nan', 'NaN'] else float(0))
                else:
                    continue


    # # forecast 1 qtr earnings from alpha vantage API
    # for i in earning_calendar: comment out the for loop in case of future usage, i can be the parameter of {}month
    CSV_URL = f'https://www.alphavantage.co/query?function=EARNINGS_CALENDAR&symbol={symbol}&horizon=12month&apikey={alpha_vantage_api_key}'
    with requests.Session() as s:
        download = s.get(CSV_URL)
        decoded_content = download.content.decode('utf-8')
        cr = csv.reader(decoded_content.splitlines(), delimiter=',')
        my_list = list(cr)

        forecast_earanings_df = pd.DataFrame(
            columns=my_list[0]
            ,data=my_list[1::]
            )
        
        if forecast_earanings_df['estimate'].head(1).values != '':
            latest_projected_EPS = float(forecast_earanings_df['estimate'].head(1).values)
        else:
            latest_projected_EPS = 0


    # forecast 1 year earnings from yf API, forwardPE, PEG
    # yf data
    yf_data = yf.Ticker(symbol).info
    # alpha vintage data
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()
    
    for key, value in data.items():
        if key == 'PEGRatio':
            PEG_12month_projected = round(float(value), 2)

        if key == 'ForwardPE':
            PE_12month_projected = round(float(value), 2)

    forecast_earnings_keys = [
        'forwardEps'
    ]


    if not all(key in yf_data.keys() for key in forecast_earnings_keys):
        # Handle the case where one or more keys are missing
        EPS_12month_projected = 0
    else:
        EPS_12month_projected = yf_data['forwardEps'] # 代表了截止下一个日历年结束的EPS, next year forecasted EPS


    # US Treasury section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TREASURY_YIELD&interval=daily&maturity=10year&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            US_T_10yrs_df = pd.DataFrame(value)
            US_T_10yrs_df['value'] = pd.to_numeric(US_T_10yrs_df['value'], errors='coerce') # change dataframe value to numeric data
            US_T_10yrs_YTM = US_T_10yrs_df['value'][0]



    # COMPANY OVERVIEW
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    
    for key, value in data.items():
        # print(key)
        if key  == 'MarketCapitalization':
            stock_mkt_cap = float(value)



    # INCOME STATEMENT
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()


    for key, value in data.items():
        if key == 'annualReports':
            annual_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_income_df = annual_income_df.sort_values('fiscalDateEnding', ascending=True)

            # annual measurements cols
            annual_income_metric_cols = annual_income_df.columns[1:]

            # Calculate quarter-over-quarter change percentage
            for column in annual_income_metric_cols:  # Exclude the 'fiscalDateEnding' column
                annual_income_df[column] = pd.to_numeric(annual_income_df[column], errors='coerce')
                annual_income_df[f'{column}_YoY'] = annual_income_df[column].pct_change() * 100 
            
            annual_income_YoY_metric_cols = [col for col in annual_income_df.columns if 'YoY' in col]

            # ratios of income statement calculation
            annual_income_df['gross_margin_%'] = (annual_income_df['grossProfit'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['operating_margin_%'] = (annual_income_df['operatingIncome'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['net_margin_%'] = (annual_income_df['netIncome'] / annual_income_df['totalRevenue']) * 100



        if key == 'quarterlyReports':
            qtr_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_income_df = qtr_income_df.sort_values('fiscalDateEnding', ascending=True)

            # qtr measurements cols
            qtr_income_metric_cols = qtr_income_df.columns[1:]
            
            # Calculate quarter-over-quarter change percentage
            for column in qtr_income_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_income_df[column] = pd.to_numeric(qtr_income_df[column], errors='coerce')
                qtr_income_df[f'{column}_QoQ'] = qtr_income_df[column].pct_change() * 100

            qtr_income_QoQ_metric_cols = [col for col in qtr_income_df.columns if 'QoQ' in col]

            # ratios of income statement calculation
            qtr_income_df['gross_margin_%'] = (qtr_income_df['grossProfit'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['operating_margin_%'] = (qtr_income_df['operatingIncome'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['net_margin_%'] = (qtr_income_df['netIncome'] / qtr_income_df['totalRevenue']) * 100

    income_ratio_cols = [
        'gross_margin_%'
        ,'operating_margin_%'
        ,'net_margin_%'
        ,'netIncome'
        ]
    
    

    # BALANCESHEET
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():

        if key == 'annualReports':
            annual_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_balancesheet_df = annual_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in annual_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                annual_balancesheet_df[column] = pd.to_numeric(annual_balancesheet_df[column], errors='coerce')

            annual_balancesheet_df['current_ratio'] = (annual_balancesheet_df['totalCurrentAssets'] / annual_balancesheet_df['totalCurrentLiabilities'])
            annual_balancesheet_df['working_capital'] = annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['totalCurrentLiabilities']
            annual_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (annual_balancesheet_df['longTermDebt'] / annual_balancesheet_df['working_capital'])
            annual_balancesheet_df['debtEquity_ratio'] = (annual_balancesheet_df['totalLiabilities'] / annual_balancesheet_df['totalShareholderEquity'])
            annual_balancesheet_df['quick_ratio'] = ((annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['inventory']) / annual_balancesheet_df['totalCurrentLiabilities'])

            
        if key == 'quarterlyReports':
            qtr_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_balancesheet_df = qtr_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in qtr_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_balancesheet_df[column] = pd.to_numeric(qtr_balancesheet_df[column], errors='coerce')

            qtr_balancesheet_df['current_ratio'] = (qtr_balancesheet_df['totalCurrentAssets'] / qtr_balancesheet_df['totalCurrentLiabilities'])
            qtr_balancesheet_df['working_capital'] = qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['totalCurrentLiabilities']
            qtr_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (qtr_balancesheet_df['longTermDebt'] / qtr_balancesheet_df['working_capital'])
            qtr_balancesheet_df['debtEquity_ratio'] = (qtr_balancesheet_df['totalLiabilities'] / qtr_balancesheet_df['totalShareholderEquity'])
            qtr_balancesheet_df['quick_ratio'] = ((qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['inventory']) / qtr_balancesheet_df['totalCurrentLiabilities'])


    balancesheet_ratio_cols = [
        'current_ratio'
        ,'working_capital'
        ,'longTermDebt_to_workingCp_ratio'
        ,'debtEquity_ratio'
        ,'quick_ratio'
        ,'totalShareholderEquity'
    ]


    stock_ratios_annual_consolidate_df = pd.DataFrame()
    stock_ratios_qtr_consolidate_df = pd.DataFrame()

    stock_ratios_annual_consolidate_df[income_ratio_cols] = annual_income_df[income_ratio_cols]
    stock_ratios_annual_consolidate_df[balancesheet_ratio_cols] = annual_balancesheet_df[balancesheet_ratio_cols]

    stock_ratios_qtr_consolidate_df[income_ratio_cols] = qtr_income_df[income_ratio_cols]
    stock_ratios_qtr_consolidate_df[balancesheet_ratio_cols] = qtr_balancesheet_df[balancesheet_ratio_cols]

    # calculating new consolidated metrics 
    # annual df
    stock_ratios_annual_consolidate_df['ROE_%'] = (annual_income_df['netIncome'] / annual_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_annual_consolidate_df['liquidation_value'] = ((annual_balancesheet_df['totalAssets'] - annual_balancesheet_df['intangibleAssets']) - annual_balancesheet_df['totalLiabilities'])
    stock_ratios_annual_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_annual_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_annual_consolidate_df['fiscalDateEnding'] = annual_balancesheet_df['fiscalDateEnding']

    # qtr df
    stock_ratios_qtr_consolidate_df['ROE_%'] = (qtr_income_df['netIncome'] / qtr_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_qtr_consolidate_df['liquidation_value'] = ((qtr_balancesheet_df['totalAssets'] - qtr_balancesheet_df['intangibleAssets']) - qtr_balancesheet_df['totalLiabilities'])
    stock_ratios_qtr_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_qtr_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_qtr_consolidate_df['fiscalDateEnding'] = qtr_balancesheet_df['fiscalDateEnding']





    # Consolidated section
    stock_consolidate_df = Daily_stock_df.head(window_days)


    stock_consolidate_df_date = stock_consolidate_df.index
    for i in stock_consolidate_df_date:
                
        # Filter the DataFrame to include only dates(index) less than or equal to the target date
        filtered_qtrEPS_df = qtrEPS_df[qtrEPS_df['reportedDate'] < i]

        # Select the first four rows from the past_qtrs_EPS
        past_4_qtrs_EPS = filtered_qtrEPS_df.head(4) 
        past_3_qtrs_EPS = filtered_qtrEPS_df.head(3)
        past_1_qtr_EPS = filtered_qtrEPS_df.head(1)

        # Calculate the sum of the numeric values in the selected rows
        EPS_TTM = past_4_qtrs_EPS['reportedEPS'].values.sum()
        EPS_curr_qtr = past_1_qtr_EPS['reportedEPS'].values.sum()

        # assign each index row with the EPS_TTM
        stock_consolidate_df.loc[i, "EPS_TTM"] = EPS_TTM
        stock_consolidate_df.loc[i, "EPS_current_qtr"] = EPS_curr_qtr

        if i == max(stock_consolidate_df.index):
            EPS_nextQtr_projected = latest_projected_EPS + past_3_qtrs_EPS['reportedEPS'].values.sum()  # This metrics is the past 3 qtrs post EPS + 1 projected EPS
        else:
            continue

        stock_consolidate_df["EPS_nextQtr_projected"] = EPS_nextQtr_projected

    

    # stock's stats
    stock_consolidate_df["Ticker"] = symbol
    stock_consolidate_df["PE_1yr_forward"] = round(PE_12month_projected, 2)
    stock_consolidate_df["PE_TTM"] = (stock_consolidate_df["stock_price"] / stock_consolidate_df["EPS_TTM"]).round(2)
    stock_consolidate_df["PE_TTM_avg"] = stock_consolidate_df["PE_TTM"].mean().round(2)
    stock_consolidate_df["PE_TTM_std"] = np.std(stock_consolidate_df["PE_TTM"]).round(2)
    stock_consolidate_df["PE_TTM_volatility_+"] = (stock_consolidate_df["PE_TTM_avg"] + stock_consolidate_df["PE_TTM_std"]).round(2) # 这个是PE的波动范围上限
    stock_consolidate_df["PE_TTM_volatility_-"] = (stock_consolidate_df["PE_TTM_avg"] - stock_consolidate_df["PE_TTM_std"]).round(2) # 这个是PE的波动范围下限

    stock_consolidate_df["relative_valuation_TTM_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * stock_consolidate_df["EPS_TTM"]).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_TTM_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * stock_consolidate_df["EPS_TTM"]).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_TTM_median"] = (np.median([stock_consolidate_df["relative_valuation_TTM_+"][0], stock_consolidate_df["relative_valuation_TTM_-"][0]])).round(2) #这个是根据最新TTM PE估值的价格中位数

    stock_consolidate_df["relative_valuation_nextQuater_projected_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * stock_consolidate_df["EPS_nextQtr_projected"]).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_nextQuater_projected_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * stock_consolidate_df["EPS_nextQtr_projected"]).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_nextQuater_projected_median"] = (np.median([stock_consolidate_df["relative_valuation_nextQuater_projected_+"][0], stock_consolidate_df["relative_valuation_nextQuater_projected_-"][0]])).round(2) #这个是根据3 qtrs post EPS + 1 projected EPS 得出PE估值的价格中位数

    stock_consolidate_df[f"{window_days}_price_min"] = stock_consolidate_df["stock_price"].min().round(2)
    stock_consolidate_df[f"{window_days}_price_max"] = stock_consolidate_df["stock_price"].max().round(2)
    stock_consolidate_df[f"{window_days}_price_avg"] = stock_consolidate_df["stock_price"].mean().round(2)
    stock_consolidate_df[f"{window_days}_price_std"] = np.std(stock_consolidate_df["stock_price"]).round(2)

    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_avg"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"].values[0]
    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_std"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_std"].values[0]
    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_volatility_+"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_+"].values[0]
    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_volatility_-"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_-"].values[0]


    stock_consolidate_df["relative_valuation_nextYear_projected_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * EPS_12month_projected).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_nextYear_projected_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * EPS_12month_projected).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_nextYear_projected_median"] = (np.median([stock_consolidate_df["relative_valuation_nextYear_projected_+"][0], stock_consolidate_df["relative_valuation_nextYear_projected_-"][0]])).round(2) #这个是根据next year projected EPS 得出PE估值的价格中位数

    stock_consolidate_df["next12months_PEG"] = PEG_12month_projected
    stock_consolidate_df["TTM_PEG"] = (stock_consolidate_df["PE_TTM"] / (((EPS_12month_projected - stock_consolidate_df["EPS_TTM"]) / stock_consolidate_df["EPS_TTM"]) * 100)).round(2) # 这个是截止下一年的EPS growth rate所得出的PEG ratio, <1是undervalue的表现
    
    stock_consolidate_df["nextYear_EPS_growthRate"] = (((EPS_12month_projected - stock_consolidate_df["EPS_TTM"]) / stock_consolidate_df["EPS_TTM"]) * 100).round(2)
    stock_consolidate_df["nextQuater_EPS_growthRate"] = (((stock_consolidate_df["EPS_nextQtr_projected"] - stock_consolidate_df["EPS_TTM"]) / stock_consolidate_df["EPS_TTM"]) * 100).round(2)

    stock_consolidate_df["EarningYield_TTM"] = ((stock_consolidate_df["EPS_TTM"] / stock_consolidate_df["stock_price"]) * 100).round(2)
    stock_consolidate_df["ERP_TTM"] = stock_consolidate_df["EarningYield_TTM"] - US_T_10yrs_YTM

    stock_consolidate_df['latest_qtr_liquidation_mktcap_ratio_%'] = stock_ratios_qtr_consolidate_df['liquidation_mktcap_ratio_%'].values[-1]




    # filter conditions
    conditions = [
    (stock_consolidate_df["stock_price"] < stock_consolidate_df["relative_valuation_TTM_-"]),
    (stock_consolidate_df["stock_price"] > stock_consolidate_df["relative_valuation_TTM_+"]),
    ((stock_consolidate_df["stock_price"] >= stock_consolidate_df["relative_valuation_TTM_-"]) & (stock_consolidate_df["stock_price"] <= stock_consolidate_df["relative_valuation_TTM_+"])),
    ]

    categories = [
        'undervalued'
        ,'overvalued'
        ,'fair'
        ]

    # This KPI assess if the current stock price is under/over/fair to the current relative valuation
    stock_consolidate_df["curr_assessment"] = None

    for condition, category in zip(conditions, categories):
        stock_consolidate_df.loc[condition, "price_valuation_assessment"] = category





    # Append key-value pairs to the dictionary
    selected_cols = [
    "Ticker"
    ,"stock_price"
    ,"EPS_TTM"
    ,"EPS_current_qtr"
    ,"EPS_nextQtr_projected"
    ,"PE_1yr_forward"
    ,"PE_TTM"
    ,"PE_TTM_avg"
    ,"PE_TTM_volatility_+"
    ,"PE_TTM_volatility_-"
    ,f"PE_{PE_yr_range-1}yr_avg"
    ,f"PE_{PE_yr_range-1}yr_volatility_+"
    ,f"PE_{PE_yr_range-1}yr_volatility_-"
    ,"relative_valuation_TTM_+"
    ,"relative_valuation_TTM_-"
    ,"relative_valuation_TTM_median"
    ,"relative_valuation_nextQuater_projected_+"
    ,"relative_valuation_nextQuater_projected_-"
    ,"relative_valuation_nextQuater_projected_median"
    ,"relative_valuation_nextYear_projected_+"
    ,"relative_valuation_nextYear_projected_-"
    ,"relative_valuation_nextYear_projected_median"
    ,"price_valuation_assessment"
    ,"nextQuater_EPS_growthRate"
    ,"nextYear_EPS_growthRate"
    ,"next12months_PEG"
    ,"TTM_PEG"
    ,"EarningYield_TTM"
    ,"ERP_TTM"
    ,"latest_qtr_liquidation_mktcap_ratio_%"
    ]


    # store each stock info as pd into dictionary
    ticker_dict_pd[symbol] = stock_consolidate_df[selected_cols]
    # transfer pandas dataframe to json format, and each stock info into dictionary
    ticker_dict_json[symbol] = stock_consolidate_df[selected_cols].to_json()

    if j == 0:
        # screener df creation
        # screener df will store each stock's consolidate df's first row and union them together for screening purposee
        ticker_screen_df = pd.DataFrame(
            columns=selected_cols
        ) 
        stock_consolidate_df_values = stock_consolidate_df[selected_cols].values[0]

        # Insert rows into the DataFrame
        ticker_screen_df.loc[j] = stock_consolidate_df_values

    else:
        stock_consolidate_df_values = stock_consolidate_df[selected_cols].values[0]
        ticker_screen_df.loc[j] = stock_consolidate_df_values


# ticker screen df consolidated metrics
ticker_screen_df['Industry_PE_TTM_avg'] = round(ticker_screen_df['PE_TTM'].mean(), 2)

In [10]:
ticker_dict_pd['ACN']

Unnamed: 0,Ticker,stock_price,EPS_TTM,EPS_current_qtr,EPS_nextQtr_projected,PE_1yr_forward,PE_TTM,PE_TTM_avg,PE_TTM_volatility_+,PE_TTM_volatility_-,PE_5yr_avg,PE_5yr_volatility_+,PE_5yr_volatility_-,relative_valuation_TTM_+,relative_valuation_TTM_-,relative_valuation_TTM_median,relative_valuation_nextQuater_projected_+,relative_valuation_nextQuater_projected_-,relative_valuation_nextQuater_projected_median,relative_valuation_nextYear_projected_+,relative_valuation_nextYear_projected_-,relative_valuation_nextYear_projected_median,price_valuation_assessment,nextQuater_EPS_growthRate,nextYear_EPS_growthRate,next12months_PEG,TTM_PEG,EarningYield_TTM,ERP_TTM,latest_qtr_liquidation_mktcap_ratio_%
2024-08-30,ACN,341.95,11.88,3.13,11.95,26.39,28.78,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.84,3.47,-0.4,2.41
2024-08-29,ACN,340.21,11.88,3.13,11.95,26.39,28.64,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.82,3.49,-0.38,2.41
2024-08-28,ACN,337.39,11.88,3.13,11.95,26.39,28.4,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.79,3.52,-0.35,2.41
2024-08-27,ACN,340.38,11.88,3.13,11.95,26.39,28.65,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.82,3.49,-0.38,2.41
2024-08-26,ACN,336.78,11.88,3.13,11.95,26.39,28.35,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.78,3.53,-0.34,2.41
2024-08-23,ACN,333.27,11.88,3.13,11.95,26.39,28.05,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.74,3.56,-0.31,2.41
2024-08-22,ACN,330.57,11.88,3.13,11.95,26.39,27.83,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.71,3.59,-0.28,2.41
2024-08-21,ACN,333.6,11.88,3.13,11.95,26.39,28.08,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.75,3.56,-0.31,2.41
2024-08-20,ACN,330.37,11.88,3.13,11.95,26.39,27.81,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.71,3.6,-0.27,2.41
2024-08-19,ACN,329.44,11.88,3.13,11.95,26.39,27.73,26.09,27.45,24.73,30.81,38.93,22.69,326.11,293.79,309.95,328.03,295.52,311.77,350.54,315.8,333.17,overvalued,0.59,7.49,2.22,3.7,3.61,-0.26,2.41


In [117]:
ticker_screen_df.sort_values(by=['ERP_TTM']
                             ,ascending=False)

Unnamed: 0,Ticker,stock_price,EPS_TTM,EPS_current_qtr,EPS_latest_projected,PE_1yr_forward,PE_TTM,PE_TTM_avg,PE_TTM_volatility_+,PE_TTM_volatility_-,PE_5yr_avg,PE_5yr_volatility_+,PE_5yr_volatility_-,relative_valuation_TTM_+,relative_valuation_TTM_-,relative_valuation_TTM_median,relative_valuation_nextQuater_projected_+,relative_valuation_nextQuater_projected_-,relative_valuation_nextQuater_projected_median,relative_valuation_nextYear_projected_+,relative_valuation_nextYear_projected_-,relative_valuation_nextYear_projected_median,price_valuation_assessment,nextQuater_EPS_growthRate,nextYear_EPS_growthRate,next12months_PEG,TTM_PEG,EarningYield_TTM,ERP_TTM,latest_qtr_liquidation_mktcap_ratio_%,selected_PE_TTM_avg
21,GM,49.25,9.2,3.06,9.32,5.05,5.35,5.49,5.96,5.02,6.59,8.45,4.73,54.83,46.18,50.5,55.55,46.79,51.17,59.06,49.75,54.4,fair,1.3,7.72,16.84,0.69,18.68,14.86,121.26,20.43
19,F,11.14,1.64,0.47,1.75,5.71,6.79,6.57,6.96,6.18,9.85,15.4,4.3,11.41,10.14,10.78,12.18,10.82,11.5,13.57,12.05,12.81,fair,6.71,18.9,0.61,0.36,14.72,10.9,,20.43
7,BWA,33.65,4.1,1.19,4.05,8.8,8.21,7.99,8.4,7.58,9.04,10.89,7.19,34.44,31.08,32.76,34.02,30.7,32.36,39.23,35.4,37.32,fair,-1.22,13.9,1.38,0.59,12.18,8.36,36.52,20.43
45,TPR,40.26,4.29,0.92,3.36,8.64,9.38,9.5,9.89,9.11,14.86,22.52,7.2,42.43,39.08,40.75,33.23,30.61,31.92,46.48,42.82,44.65,fair,-21.68,9.56,1.73,0.98,10.66,6.84,3.66,20.43
40,PHM,131.58,12.92,3.77,13.15,11.03,10.18,9.52,9.95,9.09,7.91,9.96,5.86,128.55,117.44,123.0,130.84,119.53,125.18,136.12,124.35,130.24,overvalued,1.78,5.88,0.37,1.73,9.82,6.0,40.66,20.43
33,MGM,38.01,3.3,0.86,3.3,14.37,11.52,13.47,14.75,12.19,19.94,89.02,-49.14,48.68,40.23,44.46,48.68,40.23,44.46,44.25,36.57,40.41,undervalued,0.0,-9.09,1.52,-1.27,8.68,4.86,-31.17,20.43
4,BBWI,34.72,2.92,0.0,2.44,10.75,11.89,12.5,14.39,10.61,10.91,16.77,5.05,42.02,30.98,36.5,35.11,25.89,30.5,53.1,39.15,46.12,fair,-16.44,26.37,0.87,0.45,8.41,4.59,-31.87,20.43
28,LKQ,42.4,3.5,0.98,3.56,11.59,12.11,11.81,12.26,11.36,13.56,15.09,12.03,42.91,39.76,41.33,43.65,40.44,42.04,48.92,45.33,47.12,fair,1.71,14.0,2.23,0.86,8.25,4.43,-5.68,20.43
27,LEN,182.36,14.71,3.45,14.46,11.7,12.4,11.12,11.81,10.43,8.66,10.41,6.91,173.73,153.43,163.58,170.77,150.82,160.8,190.38,168.13,179.26,overvalued,-1.7,9.59,1.4,1.29,8.07,4.25,54.82,20.43
15,DHI,187.54,14.89,4.1,14.61,11.9,12.6,10.54,11.58,9.5,9.61,11.87,7.35,172.43,141.46,156.94,169.18,138.79,153.99,184.47,151.34,167.9,overvalued,-1.88,6.98,0.62,1.8,7.94,4.12,40.37,20.43


In [118]:
ticker_screen_df[ticker_screen_df['price_valuation_assessment'] == 'undervalued']

Unnamed: 0,Ticker,stock_price,EPS_TTM,EPS_current_qtr,EPS_latest_projected,PE_1yr_forward,PE_TTM,PE_TTM_avg,PE_TTM_volatility_+,PE_TTM_volatility_-,PE_5yr_avg,PE_5yr_volatility_+,PE_5yr_volatility_-,relative_valuation_TTM_+,relative_valuation_TTM_-,relative_valuation_TTM_median,relative_valuation_nextQuater_projected_+,relative_valuation_nextQuater_projected_-,relative_valuation_nextQuater_projected_median,relative_valuation_nextYear_projected_+,relative_valuation_nextYear_projected_-,relative_valuation_nextYear_projected_median,price_valuation_assessment,nextQuater_EPS_growthRate,nextYear_EPS_growthRate,next12months_PEG,TTM_PEG,EarningYield_TTM,ERP_TTM,latest_qtr_liquidation_mktcap_ratio_%,selected_PE_TTM_avg
0,ABNB,116.74,4.46,0.86,2.03,27.7,26.17,31.67,34.89,28.45,813.42,1920.05,-293.21,155.61,126.89,141.25,70.83,57.75,64.29,159.8,130.3,145.05,undervalued,-54.48,2.69,26.6,9.73,3.82,0.0,9.78,20.43
1,AMZN,173.12,4.18,1.26,4.4,37.45,41.42,50.86,56.58,45.14,75.1,85.88,64.32,236.5,188.69,212.6,248.95,198.62,223.78,328.73,262.26,295.5,undervalued,5.26,39.0,2.1,1.06,2.41,-1.41,11.75,20.43
2,APTV,70.83,5.44,1.58,5.81,11.56,13.02,14.39,15.68,13.1,35.19,58.32,12.06,85.3,71.26,78.28,91.1,76.11,83.6,123.09,102.84,112.96,undervalued,6.8,44.3,0.92,0.29,7.68,3.86,26.61,20.43
11,CMG,56.14,1.0418,0.34,1.0646,50.25,53.89,61.13,68.0,54.26,3329.36,4794.44,1864.28,70.84,56.53,63.68,72.39,57.77,65.08,87.72,70.0,78.86,undervalued,2.19,23.82,2.33,2.26,1.86,-1.96,4.8,20.43
14,DPZ,418.41,16.27,4.03,15.67,26.6,25.72,31.13,34.54,27.72,31.56,36.1,27.02,561.97,451.0,506.48,541.24,434.37,487.8,614.12,492.86,553.49,undervalued,-3.69,9.28,2.34,2.77,3.89,0.07,-26.67,20.43
17,ETSY,55.39,2.17,0.41,2.07,16.18,25.53,27.65,29.43,25.87,47.18,80.24,14.12,63.86,56.14,60.0,60.92,53.55,57.24,81.52,71.66,76.59,undervalued,-4.61,27.65,1.03,0.92,3.92,0.1,-19.01,20.43
26,LVS,39.94,2.42,0.55,2.43,16.45,16.5,18.33,19.71,16.95,-6.13,21.5,-33.76,47.7,41.02,44.36,47.9,41.19,44.54,54.99,47.29,51.14,undervalued,0.41,15.29,0.65,1.08,6.06,2.24,11.29,20.43
31,MAR,227.62,10.31,2.5,10.51,24.45,22.08,23.18,24.27,22.09,-86.58,169.92,-343.08,250.22,227.75,238.98,255.08,232.17,243.62,257.02,233.93,245.48,undervalued,1.94,2.72,2.57,8.13,4.53,0.71,-31.42,20.43
33,MGM,38.01,3.3,0.86,3.3,14.37,11.52,13.47,14.75,12.19,19.94,89.02,-49.14,48.68,40.23,44.46,48.68,40.23,44.46,44.25,36.57,40.41,undervalued,0.0,-9.09,1.52,-1.27,8.68,4.86,-31.17,20.43
50,WYNN,77.04,5.61,1.12,5.77,14.56,13.73,16.98,19.99,13.97,5.51,24.94,-13.92,112.14,78.37,95.26,115.34,80.61,97.98,110.54,77.25,93.9,undervalued,2.85,-1.43,1.26,-9.63,7.28,3.46,-14.18,20.43


# Financial Statement

In [119]:

ticker_symbols = [
    'APTV'
    ]
PE_yr_range = 21 # test for 15 years PE 



for j, symbol in enumerate(ticker_symbols):
    print(j, symbol)
    # COMPANY OVERVIEW
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    
    for key, value in data.items():
        # print(key)
        if key  == 'MarketCapitalization':
            stock_mkt_cap = float(value)



    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()



    # Monthly quote section
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Monthly Time Series':
            Monthly_stock_df = pd.DataFrame(value)


    Monthly_stock_df = Monthly_stock_df.transpose()
    Monthly_stock_df.index = pd.to_datetime(Monthly_stock_df.index)


    filter_1 = (Monthly_stock_df.index.year.isin(range((datetime.today().year - PE_yr_range) ,datetime.today().year)))
    filter_2 = (Monthly_stock_df.index.month == 12) # month = 12 to get the year end closing price

    selected_cols = [
        '4. close'
    ]

    Monthly_stock_df = Monthly_stock_df[
        filter_1
        & filter_2
    ][selected_cols]

    # Rename columns
    Monthly_stock_df.rename(
        columns={
            '4. close': 'stock_price'
            }
        ,inplace=True
        )

    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].round(2)

    # modify stock price based on stock split
    for year_i in Monthly_stock_df.index.year:
        for year_j in stock_split_record_df['effective_date'].dt.year:
            if year_i == year_j:

                # stock price to divided the split factor
                Monthly_stock_df.loc[Monthly_stock_df.index.year < year_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.year == year_j].values[0])
    


    # Earning section
    # past earnings from alpha vintage API
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year

            annualEPS_df = annualEPS_df[
                annualEPS_df['fiscalDateEnding'].isin(
                    range(
                        (datetime.today().year - PE_yr_range) 
                        ,datetime.today().year
                            )
                            )
                            ]

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=False).reset_index(drop=True)

            # calculate PE
            annualEPS_df["PE"] = Monthly_stock_df["stock_price"].values / annualEPS_df['reportedEPS'].values




    # INCOME STATEMENT
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()


    for key, value in data.items():
        if key == 'annualReports':
            annual_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_income_df = annual_income_df.sort_values('fiscalDateEnding', ascending=True)

            # annual measurements cols
            annual_income_metric_cols = annual_income_df.columns[1:]

            # Calculate quarter-over-quarter change percentage
            for column in annual_income_metric_cols:  # Exclude the 'fiscalDateEnding' column
                annual_income_df[column] = pd.to_numeric(annual_income_df[column], errors='coerce')
                annual_income_df[f'{column}_YoY'] = annual_income_df[column].pct_change() * 100 
            
            annual_income_YoY_metric_cols = [col for col in annual_income_df.columns if 'YoY' in col]

            # ratios of income statement calculation
            annual_income_df['gross_margin_%'] = (annual_income_df['grossProfit'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['operating_margin_%'] = (annual_income_df['operatingIncome'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['net_margin_%'] = (annual_income_df['netIncome'] / annual_income_df['totalRevenue']) * 100



        if key == 'quarterlyReports':
            qtr_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_income_df = qtr_income_df.sort_values('fiscalDateEnding', ascending=True)

            # qtr measurements cols
            qtr_income_metric_cols = qtr_income_df.columns[1:]
            
            # Calculate quarter-over-quarter change percentage
            for column in qtr_income_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_income_df[column] = pd.to_numeric(qtr_income_df[column], errors='coerce')
                qtr_income_df[f'{column}_QoQ'] = qtr_income_df[column].pct_change() * 100

            qtr_income_QoQ_metric_cols = [col for col in qtr_income_df.columns if 'QoQ' in col]

            # ratios of income statement calculation
            qtr_income_df['gross_margin_%'] = (qtr_income_df['grossProfit'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['operating_margin_%'] = (qtr_income_df['operatingIncome'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['net_margin_%'] = (qtr_income_df['netIncome'] / qtr_income_df['totalRevenue']) * 100

    income_ratio_cols = [
        'gross_margin_%'
        ,'operating_margin_%'
        ,'net_margin_%'
        ,'netIncome'
        ]
    
    
    # BALANCESHEET
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():

        if key == 'annualReports':
            annual_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_balancesheet_df = annual_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in annual_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                annual_balancesheet_df[column] = pd.to_numeric(annual_balancesheet_df[column], errors='coerce')

            annual_balancesheet_df['current_ratio'] = (annual_balancesheet_df['totalCurrentAssets'] / annual_balancesheet_df['totalCurrentLiabilities'])
            annual_balancesheet_df['working_capital'] = annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['totalCurrentLiabilities']
            annual_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (annual_balancesheet_df['longTermDebt'] / annual_balancesheet_df['working_capital'])
            annual_balancesheet_df['debtEquity_ratio'] = (annual_balancesheet_df['totalLiabilities'] / annual_balancesheet_df['totalShareholderEquity'])
            annual_balancesheet_df['quick_ratio'] = ((annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['inventory']) / annual_balancesheet_df['totalCurrentLiabilities'])

            
        if key == 'quarterlyReports':
            qtr_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_balancesheet_df = qtr_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in qtr_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_balancesheet_df[column] = pd.to_numeric(qtr_balancesheet_df[column], errors='coerce')

            qtr_balancesheet_df['current_ratio'] = (qtr_balancesheet_df['totalCurrentAssets'] / qtr_balancesheet_df['totalCurrentLiabilities'])
            qtr_balancesheet_df['working_capital'] = qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['totalCurrentLiabilities']
            qtr_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (qtr_balancesheet_df['longTermDebt'] / qtr_balancesheet_df['working_capital'])
            qtr_balancesheet_df['debtEquity_ratio'] = (qtr_balancesheet_df['totalLiabilities'] / qtr_balancesheet_df['totalShareholderEquity'])
            qtr_balancesheet_df['quick_ratio'] = ((qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['inventory']) / qtr_balancesheet_df['totalCurrentLiabilities'])


    balancesheet_ratio_cols = [
        'current_ratio'
        ,'working_capital'
        ,'longTermDebt_to_workingCp_ratio'
        ,'debtEquity_ratio'
        ,'quick_ratio'
        ,'totalShareholderEquity'
    ]


    stock_ratios_annual_consolidate_df = pd.DataFrame()
    stock_ratios_qtr_consolidate_df = pd.DataFrame()
    stock_PE_annual_consolidate_df = pd.DataFrame()

    stock_ratios_annual_consolidate_df[income_ratio_cols] = annual_income_df[income_ratio_cols]
    stock_ratios_annual_consolidate_df[balancesheet_ratio_cols] = annual_balancesheet_df[balancesheet_ratio_cols]

    stock_ratios_qtr_consolidate_df[income_ratio_cols] = qtr_income_df[income_ratio_cols]
    stock_ratios_qtr_consolidate_df[balancesheet_ratio_cols] = qtr_balancesheet_df[balancesheet_ratio_cols]

    # calculating new consolidated metrics 
    # annual df
    stock_ratios_annual_consolidate_df['ROE_%'] = (annual_income_df['netIncome'] / annual_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_annual_consolidate_df['liquidation_value'] = ((annual_balancesheet_df['totalAssets'] - annual_balancesheet_df['intangibleAssets']) - annual_balancesheet_df['totalLiabilities'])
    stock_ratios_annual_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_annual_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_annual_consolidate_df['fiscalDateEnding'] = annual_balancesheet_df['fiscalDateEnding']

    # qtr df
    stock_ratios_qtr_consolidate_df['ROE_%'] = (qtr_income_df['netIncome'] / qtr_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_qtr_consolidate_df['liquidation_value'] = ((qtr_balancesheet_df['totalAssets'] - qtr_balancesheet_df['intangibleAssets']) - qtr_balancesheet_df['totalLiabilities'])
    stock_ratios_qtr_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_qtr_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_qtr_consolidate_df['fiscalDateEnding'] = qtr_balancesheet_df['fiscalDateEnding']
    

    # annual PE df
    if stock_PE_annual_consolidate_df.empty:
        stock_PE_annual_consolidate_df['fiscalDateEnding'] = annualEPS_df['fiscalDateEnding']

    stock_PE_annual_consolidate_df[f'{symbol}_PE'] = annualEPS_df["PE"].round(2)


0 APTV


In [155]:
print(stock_PE_annual_consolidate_df[f'{symbol}_PE'].mean())

23.27076923076923


In [156]:
stock_PE_annual_consolidate_df

Unnamed: 0,fiscalDateEnding,APTV_PE
0,2023,18.46
1,2022,27.39
2,2021,63.44
3,2020,70.81
4,2019,19.38
5,2018,11.68
6,2017,13.59
7,2016,10.72
8,2015,16.42
9,2014,14.26


In [157]:
stock_ratios_annual_consolidate_df.tail(10)

Unnamed: 0,gross_margin_%,operating_margin_%,net_margin_%,netIncome,current_ratio,working_capital,longTermDebt_to_workingCp_ratio,debtEquity_ratio,quick_ratio,totalShareholderEquity,ROE_%,liquidation_value,liquidation_mktcap_ratio_%,fiscalDateEnding
9,17.966061,10.430758,8.015901,1351000000,1.343276,1335000000,1.79176,3.070916,1.082798,2510000000,53.824701,1629000000,9.79,2014-12-31
8,20.093458,8.244326,9.679573,1450000000,1.304049,1194000000,3.313233,4.106667,1.00331,2250000000,64.444444,-189000000,-1.14,2015-12-31
7,22.380642,12.5387,10.24116,1257000000,1.306413,1271000000,3.110149,3.968763,1.099325,2401000000,52.353186,113000000,0.68,2016-12-31
6,20.28873,10.990376,10.51692,1355000000,1.593503,2101000000,1.966683,2.622613,1.287571,3299000000,41.073052,354000000,2.13,2017-12-31
5,18.905438,10.204364,7.391756,1067000000,1.293178,1083000000,3.728532,2.546979,0.947482,3459000000,30.847066,-234000000,-1.41,2018-12-31
4,18.430034,8.887651,6.895591,990000000,1.312037,1265000000,3.13913,2.473946,0.99482,3819000000,25.923016,418000000,2.51,2019-12-31
3,14.847696,16.210011,13.806827,1804000000,1.845032,3419000000,1.17315,1.191904,1.524469,7905000000,22.820999,4429000000,26.61,2020-12-31
2,3.598412,7.613011,3.777692,590000000,2.005229,4229000000,0.959801,1.131664,1.526503,8347000000,7.068408,5086000000,30.56,2021-12-31
1,3.322088,7.221682,3.396421,594000000,1.6,2919000000,2.254539,1.451924,1.119013,8809000000,6.743104,1403000000,8.43,2022-12-31
0,4.279088,7.775173,14.652636,2938000000,1.715266,3439000000,1.830765,1.089626,1.223378,11548000000,25.441635,4294000000,25.8,2023-12-31


In [158]:
stock_ratios_qtr_consolidate_df.tail(10)

Unnamed: 0,gross_margin_%,operating_margin_%,net_margin_%,netIncome,current_ratio,working_capital,longTermDebt_to_workingCp_ratio,debtEquity_ratio,quick_ratio,totalShareholderEquity,ROE_%,liquidation_value,liquidation_mktcap_ratio_%,fiscalDateEnding
9,2.34562,6.127334,2.130206,89000000,2.598649,6628000000.0,0.981141,1.405908,2.041003,8396000000.0,1.060029,5197000000.0,31.23,2022-03-31
8,-1.626818,2.390929,-1.109194,-45000000,2.627022,6539000000.0,0.98379,1.435486,2.039313,8099000000.0,-0.555624,5014000000.0,30.13,2022-06-30
7,6.545297,10.186389,6.523624,301000000,2.659966,6854000000.0,0.924424,1.414277,2.101477,8195000000.0,3.672971,5301000000.0,31.85,2022-09-30
6,17.521552,9.482759,5.366379,249000000,1.6,2919000000.0,2.254539,1.451924,1.119013,8809000000.0,2.826655,1403000000.0,8.43,2022-12-31
5,3.819012,7.222914,3.362391,162000000,1.71362,3217000000.0,2.012123,1.385283,1.162378,8983000000.0,1.803406,1639000000.0,9.85,2023-03-31
4,4.519231,7.884615,4.653846,242000000,1.758989,3483000000.0,1.859317,1.362218,1.240357,9232000000.0,2.621317,1895000000.0,11.39,2023-06-30
3,5.318733,8.721158,31.853735,1629000000,1.807922,3794000000.0,1.691882,1.172574,1.290034,10778000000.0,15.114121,3577000000.0,21.49,2023-09-30
2,18.743647,7.216914,18.398048,905000000,1.715266,3439000000.0,1.830765,1.089626,1.223378,11548000000.0,7.836855,4294000000.0,25.8,2023-12-31
1,4.835748,8.549276,4.448072,218000000,1.291303,1735000000.0,2.721037,1.099371,0.893217,11120000000.0,1.960432,3983000000.0,23.93,2024-03-31
0,4.434765,8.730944,18.57058,938000000,1.494414,2921000000.0,1.884286,1.132031,1.093263,11467000000.0,8.179995,4429000000.0,26.61,2024-06-30


In [159]:
# 90% accurate
annual_income_df[['fiscalDateEnding'] + income_ratio_cols]  

Unnamed: 0,fiscalDateEnding,gross_margin_%,operating_margin_%,net_margin_%,netIncome
11,2012-12-31,17.260861,9.585038,6.993961,1077000000
10,2013-12-31,17.754889,9.974864,7.430568,1212000000
9,2014-12-31,17.966061,10.430758,8.015901,1351000000
8,2015-12-31,20.093458,8.244326,9.679573,1450000000
7,2016-12-31,22.380642,12.5387,10.24116,1257000000
6,2017-12-31,20.28873,10.990376,10.51692,1355000000
5,2018-12-31,18.905438,10.204364,7.391756,1067000000
4,2019-12-31,18.430034,8.887651,6.895591,990000000
3,2020-12-31,14.847696,16.210011,13.806827,1804000000
2,2021-12-31,3.598412,7.613011,3.777692,590000000


In [160]:
# 90% accurate
annual_balancesheet_df[['fiscalDateEnding'] + balancesheet_ratio_cols]

Unnamed: 0,fiscalDateEnding,current_ratio,working_capital,longTermDebt_to_workingCp_ratio,debtEquity_ratio,quick_ratio,totalShareholderEquity
12,2011-12-31,1.48195,1789000000,1.115707,4.121445,1.198006,1688000000
11,2012-12-31,1.428532,1568000000,1.482143,3.132623,1.137196,2345000000
10,2013-12-31,1.477144,1858000000,1.265339,2.615252,1.196456,2911000000
9,2014-12-31,1.343276,1335000000,1.79176,3.070916,1.082798,2510000000
8,2015-12-31,1.304049,1194000000,3.313233,4.106667,1.00331,2250000000
7,2016-12-31,1.306413,1271000000,3.110149,3.968763,1.099325,2401000000
6,2017-12-31,1.593503,2101000000,1.966683,2.622613,1.287571,3299000000
5,2018-12-31,1.293178,1083000000,3.728532,2.546979,0.947482,3459000000
4,2019-12-31,1.312037,1265000000,3.13913,2.473946,0.99482,3819000000
3,2020-12-31,1.845032,3419000000,1.17315,1.191904,1.524469,7905000000


In [38]:
import requests

symbol = 'MSFT'
# COMPANY OVERVIEW
# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
r = requests.get(url)
data = r.json()

# pd.DataFrame(data)

for key, value in data.items():
    # print(key)
    if key in ['Sector', 'Industry', 'PEGRatio', 'MarketCapitalization', 'ForwardPE']:
        print(key, value)

Sector TECHNOLOGY
Industry SERVICES-PREPACKAGED SOFTWARE
MarketCapitalization 3017962684000
PEGRatio 2.187
ForwardPE 30.58


In [176]:
# # List of columns to plot
# # columns_to_plot = qtr_income_df.columns[1:]  # Exclude the 'fiscalDateEnding' column

# # Create bar charts for each column
# for column in qtr_income_ratio_cols:
#     fig = px.bar(qtr_income_df, x='fiscalDateEnding', y=column, title=column)
#     fig.show()

In [11]:
symbol = 'MSFT'

# BALANCESHEET
# replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={symbol}&apikey={alpha_vantage_api_key}'
r = requests.get(url)
data = r.json()

for key, value in data.items():
    print(key, value)

    if key == 'annualReports':
        annual_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
        annual_balancesheet_df = annual_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

        for column in annual_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
            annual_balancesheet_df[column] = pd.to_numeric(annual_balancesheet_df[column], errors='coerce')

    if key == 'quarterlyReports':
        qtr_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
        qtr_balancesheet_df = qtr_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

        for column in qtr_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
            qtr_balancesheet_df[column] = pd.to_numeric(qtr_balancesheet_df[column], errors='coerce')

symbol MSFT
annualReports [{'fiscalDateEnding': '2023-06-30', 'reportedCurrency': 'USD', 'totalAssets': '411976000000', 'totalCurrentAssets': '184257000000', 'cashAndCashEquivalentsAtCarryingValue': '34704000000', 'cashAndShortTermInvestments': '111262000000', 'inventory': '2500000000', 'currentNetReceivables': '48688000000', 'totalNonCurrentAssets': '232219000000', 'propertyPlantEquipment': '95641000000', 'accumulatedDepreciationAmortizationPPE': '68251000000', 'intangibleAssets': '77252000000', 'intangibleAssetsExcludingGoodwill': '9366000000', 'goodwill': '67886000000', 'investments': '95599000000', 'longTermInvestments': '9879000000', 'shortTermInvestments': '76558000000', 'otherCurrentAssets': '21807000000', 'otherNonCurrentAssets': '30601000000', 'totalLiabilities': '205753000000', 'totalCurrentLiabilities': '104149000000', 'currentAccountsPayable': '18095000000', 'deferredRevenue': '107626000000', 'currentDebt': '5247000000', 'shortTermDebt': '0', 'totalNonCurrentLiabilities': '

In [14]:
qtr_balancesheet_df.tail(5)

Unnamed: 0,fiscalDateEnding,totalAssets,totalCurrentAssets,cashAndCashEquivalentsAtCarryingValue,cashAndShortTermInvestments,inventory,currentNetReceivables,totalNonCurrentAssets,propertyPlantEquipment,accumulatedDepreciationAmortizationPPE,intangibleAssets,intangibleAssetsExcludingGoodwill,goodwill,investments,longTermInvestments,shortTermInvestments,otherCurrentAssets,otherNonCurrentAssets,totalLiabilities,totalCurrentLiabilities,currentAccountsPayable,deferredRevenue,currentDebt,shortTermDebt,totalNonCurrentLiabilities,capitalLeaseObligations,longTermDebt,currentLongTermDebt,longTermDebtNoncurrent,shortLongTermDebtTotal,otherCurrentLiabilities,otherNonCurrentLiabilities,totalShareholderEquity,treasuryStock,retainedEarnings,commonStock,commonStockSharesOutstanding
4,2023-03-31,380088000000,163889000000,26562000000,104427000000,2877000000,38137000000,220499000000,88132000000,65998000000,77819000000,9879000000,67940000000,95186000000.0,9415000000,77865000000,19165000000,26954000000,185405000000,85691000000,15305000000,79202000000.0,6245000000.0,6245000000,99714000000,12312000000.0,48210000000,6245000000.0,41965000000,48210000000,12664000000,17437000000,194683000000,,108234000000,92093000000,7437000000
3,2023-06-30,411976000000,184257000000,34704000000,111262000000,2500000000,48688000000,232219000000,95641000000,68251000000,77252000000,9366000000,67886000000,95599000000.0,9879000000,76558000000,21807000000,30601000000,205753000000,104149000000,18095000000,107626000000.0,5247000000.0,0,101604000000,12728000000.0,47237000000,5247000000.0,41990000000,47237000000,14745000000,17981000000,206223000000,,118848000000,93718000000,7432000000
2,2023-09-30,445785000000,207586000000,80452000000,143951000000,3000000000,36953000000,242699000000,102502000000,69486000000,76685000000,8895000000,67790000000,68750000000.0,11423000000,63499000000,23682000000,32154000000,225071000000,124792000000,19307000000,98376000000.0,29556000000.0,25808000000,100279000000,13487000000.0,45694000000,3748000000.0,41946000000,71502000000,14475000000,18634000000,220714000000,,132143000000,95508000000,7431000000
1,2023-12-31,470558000000,147393000000,17305000000,81017000000,1615000000,42831000000,327965000000,112308000000,72949000000,148827000000,29896000000,118931000000,71482000000.0,13367000000,63712000000,21930000000,32265000000,232290000000,121016000000,17695000000,92068000000.0,29291000000.0,27041000000,111274000000,14155000000.0,47178000000,2250000000.0,44928000000,74219000000,16362000000,20787000000,238268000000,,145737000000,97480000000,7432000000
0,2024-03-31,484275000000,147180000000,19634000000,80021000000,1304000000,44029000000,341895000000,121375000000,74945000000,147991000000,28828000000,119163000000,69791000000.0,14807000000,60387000000,21826000000,35551000000,231123000000,118525000000,18087000000,89666000000.0,22749000000.0,20500000000,112598000000,14469000000.0,44907000000,2249000000.0,42658000000,65407000000,18023000000,23271000000,253152000000,,159394000000,99193000000,7433000000


# SP500 sectors

In [38]:
# Function to get the list of S&P 500 companies and their sectors
def get_sp500_companies():
    # Fetch the S&P 500 company symbols and sectors from a reliable source (e.g., Wikipedia)
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    
    # Extract the relevant table containing the company symbols and sectors
    sp500_df = tables[0]
    
    # Return the DataFrame containing S&P 500 companies and sectors
    return sp500_df[['Symbol', 'GICS Sector', 'GICS Sub-Industry']]


# Function to create a dictionary of sectors and sub-sectors
def create_sector_subsector_dict(df):
    sector_subsector_dict = {}
    for index, row in df.iterrows():
        sector = row['GICS Sector']
        subsector = row['GICS Sub-Industry']
        if sector not in sector_subsector_dict:
            sector_subsector_dict[sector] = [subsector]
        else:
            sector_subsector_dict[sector].append(subsector)
    return sector_subsector_dict

# Function to filter the S&P 500 companies by sector
def company_sector_list(df, sector):
    return df[df['GICS Sector'] == sector]['Symbol'].tolist()

def company_sub_sector_list(df, sub_sector):
    return df[df['GICS Sub-Industry'] == sub_sector]['Symbol'].tolist()


# Get the list of S&P 500 companies and their sectors
sp500_df  = get_sp500_companies()

sp500_companies_sectors = sp500_df ['GICS Sector'].value_counts().index
sp500_companies_sub_sectors = sp500_df ['GICS Sub-Industry'].value_counts().index

sector_subsector_dict = create_sector_subsector_dict(sp500_df)


# Function to create a DataFrame from the sector_subsector_dict
def create_sector_dataframe():
    # Create a list to store dictionacompany_sector_listries representing each row of data
    data = []
    
    # Filter the DataFrame to get stocks in the specified sector
    for sector in sp500_companies_sectors:
        sector_stocks_list = company_sector_list(sp500_df, sector)

        # Iterate over the stocks in the sector and create a dictionary for each
        for i, ticker in enumerate(sector_stocks_list, start=1):
            # Create a dictionary for the current stock in the sector
            row_data = {'Sector': sector, 'Ticker': ticker}
            # Append the dictionary to the list
            data.append(row_data)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    return df


sector_subsector_dict = create_sector_subsector_dict(sp500_df)


sector_ticker_df = create_sector_dataframe()
sector_ticker_list = sector_ticker_df[sector_ticker_df['Sector'] == 'Consumer Staples']['Ticker'].tolist()

In [39]:
# Pivot the DataFrame
pivot_sector_ticker_df = sector_ticker_df.groupby('Sector')['Ticker'].apply(list).reset_index()

# Transpose to get sectors as columns
pivot_sector_ticker_df = pivot_sector_ticker_df.set_index('Sector').T

pivot_sector_ticker_df

Sector,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Utilities
Ticker,"[GOOGL, GOOG, T, CHTR, CMCSA, EA, FOXA, FOX, I...","[ABNB, AMZN, APTV, AZO, BBWI, BBY, BKNG, BWA, ...","[MO, ADM, BF.B, BG, CPB, CHD, CLX, KO, CL, CAG...","[APA, BKR, CVX, COP, CTRA, DVN, FANG, EOG, EQT...","[AFL, ALL, AXP, AIG, AMP, AON, ACGL, AJG, AIZ,...","[ABT, ABBV, A, ALGN, AMGN, BAX, BDX, BIO, TECH...","[MMM, AOS, ALLE, AAL, AME, ADP, AXON, BA, BR, ...","[ACN, ADBE, AMD, AKAM, APH, ADI, ANSS, AAPL, A...","[APD, ALB, AMCR, AVY, BALL, CE, CF, CTVA, DOW,...","[ARE, AMT, AVB, BXP, CPT, CBRE, CSGP, CCI, DLR...","[AES, LNT, AEE, AEP, AWK, ATO, CNP, CMS, ED, C..."


In [15]:
pivot_sector_ticker_df['Financials'].iloc[0]

['AFL',
 'ALL',
 'AXP',
 'AIG',
 'AMP',
 'AON',
 'ACGL',
 'AJG',
 'AIZ',
 'BAC',
 'BK',
 'BRK.B',
 'BLK',
 'BX',
 'BRO',
 'COF',
 'CBOE',
 'SCHW',
 'CB',
 'CINF',
 'C',
 'CFG',
 'CME',
 'CPAY',
 'DFS',
 'EG',
 'FDS',
 'FIS',
 'FITB',
 'FI',
 'BEN',
 'GPN',
 'GL',
 'GS',
 'HIG',
 'HBAN',
 'ICE',
 'IVZ',
 'JKHY',
 'JPM',
 'KEY',
 'KKR',
 'L',
 'MTB',
 'MKTX',
 'MMC',
 'MA',
 'MET',
 'MCO',
 'MS',
 'MSCI',
 'NDAQ',
 'NTRS',
 'PYPL',
 'PNC',
 'PFG',
 'PGR',
 'PRU',
 'RJF',
 'RF',
 'SPGI',
 'STT',
 'SYF',
 'TROW',
 'TRV',
 'TFC',
 'USB',
 'V',
 'WRB',
 'WFC',
 'WTW']

In [16]:
len(pivot_sector_ticker_df['Financials'].iloc[0])

71

## Appendix

In [128]:
# Consumer Staples

ticker_symbols = [
    'MO',
    'ADM',
    # 'BF.B', # DATA ISSUE
    'BG',
    'CPB',
    'CHD',
    'CLX',
    'KO',
    'CL',
    'CAG',
    'STZ', # !!
    'COST',
    'DG',
    'DLTR', # !!
    'EL',
    'GIS',
    'HSY',
    'HRL',
    'K',
    'KVUE',
    'KDP',
    'KMB',
    'KHC',
    # 'KR', # DATA ISSUE
    'LW',
    # 'MKC', # DATA ISSUE
    'TAP',
    'MDLZ',
    'MNST',
    'PEP',
    'PM',
    'PG',
    'SJM',
    'SYY',
    'TGT', # !!
    'TSN', # !!
    # 'WBA', # DATA ISSUE
    'WMT'
]

In [None]:
# Energy

ticker_symbols = [
'APA',
 'BKR', # !! HIGH PE, LOW PROFITABILITY, HIGH FORECAST,  BUT CANNOT BEAT FORECAST, 总结与TRGP类似, 基本面更稳妥, 盈利预期高, 有待考证
 'CVX',
 'COP',
 'CTRA',
 'DVN',
 'FANG',
 'EOG',
 'EQT',
 'XOM',
 'HAL', # !!!! 与SLB类似
 'HES', # !  HIGH PE, UNDERVALUE, 市场炒作高预期, 2024表现强劲, 已经被超热了
 'KMI',
 'MRO',
 'MPC',
 'OXY',
 'OKE', # !!
 'PSX',
 'SLB', # !!! A BIT HIGH PE, GOOD ROE, MED PROFITABILITY, STEADY GROWTH FORECAST, AND CAN BEAT FORECAST, 总结基本面良好，与HAL类型相似, 市场炒作热度比HAL更高, 值得研究
 'TRGP', # !！ HGIH PE, HIGH ROE, RELATIVE MORE DEBT THAN EQUITY, LOW PROFITABILITY, HIGH FORECAST, BUT CANNOT BEAT FORECAST, 总结基本面一般, 但是盈利预期非常高, 有待考证
 'VLO',
 'WMB'
 ]