In [24]:
import os
import pandas as pd
from decimal import Decimal
import numpy as np
from datetime import datetime, timedelta
from dotenv import load_dotenv
import yfinance as yf


import matplotlib.pyplot as plt
import plotly.express as px
import pytz  # Make sure to import pytz for timezone handling
import seaborn as sns


import requests
import csv
import json

import warnings



from sklearn.linear_model import LinearRegression

In [25]:
load_dotenv()

API_KEY = os.getenv("alpha_vantage_api_key")

In [26]:
# Addtional setting session
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")
pd.options.mode.copy_on_write = True

In [31]:
# Parameters section

alpha_vantage_api_key = API_KEY # FREE TIER API rate limit is 25 requests per day
alpha_vantage_function = {
    'core':[
        'TIME_SERIES_INTRADA'
        ,'TIME_SERIES_DAILY' # this is daily time series quote
        ,'TIME_SERIES_DAILY_ADJUSTED' # this is daily time series adjusted by split/dividend-adjusted
        ,'GLOBAL_QUOTE'
    ]
    ,'fundmental':[
    'INCOME_STATEMENT'
    ,'BALANCE_SHEET' # this is daily time series quote
    ,'CASH_FLOW' # this is daily time series adjusted by split/dividend-adjusted
    ,'EARNINGS'
    ,'EARNINGS_CALENDAR'
]
}

# Custmized
ticker_symbols = [

    'GOOG'
    # ,'MSFT'
    ,'MSFT'
    # ,'ACN'
    # ,'CE'
    # ,'APTV'
    # ,'MSFT'
    # ,'NVDA'
    # ,'CEPU'
    # ,'DHI'
    # ,'TSM'
    # ,'ULTA'
]


# # Consumer Staples
# ticker_symbols = [
#     'MO',
#     'ADM',
#     # 'BF.B', # DATA ISSUE
#     'BG',
#     'CPB',
#     'CHD',
#     'CLX',
#     'KO',
#     'CL',
#     'CAG',
#     'STZ',
#     'COST',
#     'DG',
#     'DLTR',
#     'EL',
#     'GIS',
#     'HSY',
#     'HRL',
#     'K',
#     'KVUE',
#     'KDP',
#     'KMB',
#     'KHC',
#     'KR', # DATA ISSUE
#     'LW',
#     'MKC', # DATA ISSUE
#     'TAP',
#     'MDLZ',
#     'MNST',
#     'PEP',
#     'PM',
#     'PG',
#     'SJM',
#     'SYY',
#     'TGT',
#     'TSN',
#     'WBA',
#     'WMT'
# ]

# # Energy
# ticker_symbols = [
# 'APA',
#  'BKR', 
#  'CVX',
#  'COP',
#  'CTRA',
#  'DVN',
#  'FANG',
#  'EOG',
#  'EQT',
#  'XOM',
#  'HAL', 
#  'HES', 
#  'KMI',
#  'MRO',
#  'MPC',
#  'OXY',
#  'OKE', 
#  'PSX',
#  'SLB', 
#  'TRGP',
#  'VLO',
#  'WMB'
#  ]


# Consumer Discretionary
# ticker_symbols = [
#  'ABNB',
#  'AMZN',
#  'APTV',
#  'AZO',
#  'BBWI',
#  'BBY',
#  'BKNG',
#  'BWA',
#  'CZR',
#  'KMX',
#  'CCL',
#  'CMG',
#  'DRI',
#  'DECK',
#  'DPZ',
#  'DHI',
#  'EBAY',
#  'ETSY',
#  'EXPE',
#  'F',
#  'GRMN',
#  'GM',
#  'GPC',
#  'HAS',
#  'HLT',
#  'HD',
#  'LVS',
#  'LEN',
#  'LKQ',
#  'LOW',
#  'LULU',
#  'MAR',
#  'MCD',
#  'MGM',
#  'MHK',
#  'NKE',
#  'NCLH',
#  'NVR',
#  'ORLY',
#  'POOL',
#  'PHM',
#  'RL',
#  'ROST',
#  'RCL',
#  'SBUX',
#  'TPR',
#  'TSLA',
#  'TJX',
#  'TSCO',
#  'ULTA',
#  'WYNN',
#  'YUM'
#  ]


# # # Basic Materials
# ticker_symbols = [
#     'LIN'
#     ,'SHW'
#     ,'APD'
#     ,'FCX'
#     ,'ECL'
#     ,'CTVA'
#     ,'NEM'
#     ,'VMC'
#     ,'DD'
#     ,'MLM'
#     ,'NUE'
#     ,'DOW'
#     ,'PPG'
#     # ,'SW'
#     ,'IFF'
#     ,'LYB'
#     ,'PKG'
#     ,'IP'
#     ,'STLD'
#     ,'BALL'
#     ,'AVY'
#     ,'CF'
#     ,'AMCR'
#     ,'EMN'
#     ,'ALB'
#     ,'CE'
#     ,'MOS'
#     ,'FMC'
# ]


# Time intelligent parameters
window_days = 90
end_date = datetime.now()
start_date = end_date - timedelta(days=window_days)
earning_calendar = [
    3  # this will return next 1 qtr forecast earning; nowadays the earning calendar only shows the next 1 qtr forecast earning
    ,6  # this will return next 2 qtr forecast earning
    ,12  # this will return next 4 qtr forecast earning
]

PE_yr_range = 6 # this will return x-1 yr PE range

ticker_dict_json = {}
ticker_dict_pd = {}

In [32]:
def convert_to_numeric_or_zero(value):
    try:
        # Try to convert the string to a float (or int)
        num = round(float(value), 2)
        return num  # Return the original value if it can be converted to a number
    except ValueError:
        # If conversion fails, return '0'
        return 0

# PE TTM Valuation
It is a stock screener partitioned by ticker level

In [33]:
# Daily quote section
for j, symbol in enumerate(ticker_symbols):

    print(j, symbol)
    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df = pd.DataFrame()
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()


    # Daily quote section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={alpha_vantage_api_key}&outputsize=full'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Time Series (Daily)':


            selected_cols = [
                '4. close'
            ]

            Daily_stock_df = pd.DataFrame(value).transpose()[selected_cols] # tranpose the dataframe and sub select selected cols

            # Rename columns
            Daily_stock_df.rename(
                columns={
                    '4. close': 'stock_price'
                    }
                ,inplace=True
                )
            
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].round(2)
            Daily_stock_df.index = pd.to_datetime(Daily_stock_df.index)


    for date_i in Daily_stock_df.index.date:
        for date_j in stock_split_record_df['effective_date'].dt.date:
            if date_i == date_j:

                # stock price to divided the split factor
                Daily_stock_df.loc[Daily_stock_df.index.date < date_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.date == date_j].values[0])


    # MA200 calculation
    Daily_stock_df['MA200'] = Daily_stock_df.sort_index(ascending=True)['stock_price'].rolling(window=200).mean()

    # Calculate the slope of the MA200 using linear regression
    def calculate_slope(series):
        # Create a time index (0, 1, 2, ..., n) for the linear regression
        x = np.arange(len(series)).reshape(-1, 1)
        y = series.values.reshape(-1, 1)

        # Fit the linear regression model
        model = LinearRegression()
        model.fit(x, y)

        # Return the slope
        return model.coef_[0][0]


    # Apply the slope calculation to the MA200 values
    Daily_stock_df['MA200_slope'] = Daily_stock_df.sort_index(ascending=True)['MA200'].rolling(window=20).apply(calculate_slope, raw=False)



    # Monthly quote section
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Monthly Time Series':
            Monthly_stock_df = pd.DataFrame(value)

    Monthly_stock_df = Monthly_stock_df.transpose()
    Monthly_stock_df.index = pd.to_datetime(Monthly_stock_df.index)


    filter_1 = (Monthly_stock_df.index.year.isin(range((datetime.today().year - PE_yr_range) ,datetime.today().year)))
    filter_2 = (Monthly_stock_df.index.month == 12) # month = 12 to get the year end closing price

    selected_cols = [
        '4. close'
    ]

    Monthly_stock_df = Monthly_stock_df[
        filter_1
        & filter_2
    ][selected_cols]

    # Rename columns
    Monthly_stock_df.rename(
        columns={
            '4. close': 'stock_price'
            }
        ,inplace=True
        )

    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].round(2)

    # modify stock price based on stock split
    for year_i in Monthly_stock_df.index.year:
        for year_j in stock_split_record_df['effective_date'].dt.year:
            if year_i == year_j:

                # stock price to divided the split factor
                Monthly_stock_df.loc[Monthly_stock_df.index.year < year_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.year == year_j].values[0])



    # Earning section
    # past earnings from alpha vintage API
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year

            annualEPS_df = annualEPS_df[
                annualEPS_df['fiscalDateEnding'].isin(
                    range(
                        (datetime.today().year - 6) 
                        ,datetime.today().year
                            )
                            )
                            ]

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=False).reset_index(drop=True)

            # calculate metrics
            if Monthly_stock_df.shape[0] <= annualEPS_df.shape[0]: # if Monthly_stock_df has less records than annualEPS_df, choose the mini length record
                annualEPS_df = annualEPS_df[:(Monthly_stock_df.shape[0])]

                annualEPS_df["PE"] = Monthly_stock_df["stock_price"].values / annualEPS_df['reportedEPS'].values
                annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"] = annualEPS_df["PE"].mean().round(2)
                annualEPS_df[f"PE_{PE_yr_range-1}yr_std"] = np.std(annualEPS_df["PE"]).round(2)
                annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_+"] = (annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"] + annualEPS_df[f"PE_{PE_yr_range-1}yr_std"]).round(2) # 这个是PE的波动范围上限
                annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_-"] = (annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"] - annualEPS_df[f"PE_{PE_yr_range-1}yr_std"]).round(2) # 这个是PE的波动范围下限



        if key == 'quarterlyEarnings':

            selected_cols = [
                'reportedDate'
                ,'reportedEPS'
            ]

            qtrEPS_df = pd.DataFrame(value)[selected_cols] # tranpose the dataframe and sub select selected cols
            qtrEPS_df['reportedDate'] = pd.to_datetime(qtrEPS_df['reportedDate'])

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    qtrEPS_df[col] = qtrEPS_df[col].astype(str).apply(lambda x: float(x) if x not in [None, 'None', 'nan', 'NaN'] else float(0))
                else:
                    continue


    # # forecast 1 qtr earnings from alpha vantage API
    # for i in earning_calendar: comment out the for loop in case of future usage, i can be the parameter of {}month
    CSV_URL = f'https://www.alphavantage.co/query?function=EARNINGS_CALENDAR&symbol={symbol}&horizon=12month&apikey={alpha_vantage_api_key}'
    with requests.Session() as s:
        download = s.get(CSV_URL)
        decoded_content = download.content.decode('utf-8')
        cr = csv.reader(decoded_content.splitlines(), delimiter=',')
        my_list = list(cr)

        forecast_earanings_df = pd.DataFrame(
            columns=my_list[0]
            ,data=my_list[1::]
            )
        
        if forecast_earanings_df['estimate'].head(1).values != '':
            latest_projected_EPS = float(forecast_earanings_df['estimate'].head(1).values)
        else:
            latest_projected_EPS = 0


    # forecast 1 year earnings from yf API, forwardPE, PEG
    # yf data
    yf_data = yf.Ticker(symbol).info
    # alpha vintage data
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()
    
    for key, value in data.items():
        if key == 'PEGRatio':
            PEG_12month_projected = convert_to_numeric_or_zero(value=value)


        if key == 'ForwardPE':
           PE_12month_projected = convert_to_numeric_or_zero(value=value)


    forecast_earnings_keys = [
        'forwardEps'
    ]


    if not all(key in yf_data.keys() for key in forecast_earnings_keys):
        # Handle the case where one or more keys are missing
        EPS_12month_projected = 0
    else:
        EPS_12month_projected = yf_data['forwardEps'] # 代表了截止下一个日历年结束的EPS, next year forecasted EPS


    # US Treasury section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TREASURY_YIELD&interval=daily&maturity=10year&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            US_T_10yrs_df = pd.DataFrame(value)
            US_T_10yrs_df['value'] = pd.to_numeric(US_T_10yrs_df['value'], errors='coerce') # change dataframe value to numeric data
            US_T_10yrs_YTM = US_T_10yrs_df['value'][0]



    # COMPANY OVERVIEW
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    
    for key, value in data.items():
        if key  == 'MarketCapitalization':
            stock_mkt_cap = float(value)



    # INCOME STATEMENT
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()


    for key, value in data.items():
        if key == 'annualReports':
            annual_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_income_df = annual_income_df.sort_values('fiscalDateEnding', ascending=True)

            # annual measurements cols
            annual_income_metric_cols = annual_income_df.columns[1:]

            # Calculate quarter-over-quarter change percentage
            for column in annual_income_metric_cols:  # Exclude the 'fiscalDateEnding' column
                annual_income_df[column] = pd.to_numeric(annual_income_df[column], errors='coerce')
                annual_income_df[f'{column}_YoY'] = annual_income_df[column].pct_change() * 100 
            
            annual_income_YoY_metric_cols = [col for col in annual_income_df.columns if 'YoY' in col]

            # ratios of income statement calculation
            annual_income_df['gross_margin_%'] = (annual_income_df['grossProfit'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['operating_margin_%'] = (annual_income_df['operatingIncome'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['net_margin_%'] = (annual_income_df['netIncome'] / annual_income_df['totalRevenue']) * 100



        if key == 'quarterlyReports':
            qtr_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_income_df = qtr_income_df.sort_values('fiscalDateEnding', ascending=True)

            # qtr measurements cols
            qtr_income_metric_cols = qtr_income_df.columns[1:]
            
            # Calculate quarter-over-quarter change percentage
            for column in qtr_income_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_income_df[column] = pd.to_numeric(qtr_income_df[column], errors='coerce')
                qtr_income_df[f'{column}_QoQ'] = qtr_income_df[column].pct_change() * 100

            qtr_income_QoQ_metric_cols = [col for col in qtr_income_df.columns if 'QoQ' in col]

            # ratios of income statement calculation
            qtr_income_df['gross_margin_%'] = (qtr_income_df['grossProfit'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['operating_margin_%'] = (qtr_income_df['operatingIncome'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['net_margin_%'] = (qtr_income_df['netIncome'] / qtr_income_df['totalRevenue']) * 100

    income_ratio_cols = [
        'gross_margin_%'
        ,'operating_margin_%'
        ,'net_margin_%'
        ,'netIncome'
        ]
    
    

    # BALANCESHEET
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():

        if key == 'annualReports':
            annual_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_balancesheet_df = annual_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in annual_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                annual_balancesheet_df[column] = pd.to_numeric(annual_balancesheet_df[column], errors='coerce')

            annual_balancesheet_df['current_ratio'] = (annual_balancesheet_df['totalCurrentAssets'] / annual_balancesheet_df['totalCurrentLiabilities'])
            annual_balancesheet_df['working_capital'] = annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['totalCurrentLiabilities']
            annual_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (annual_balancesheet_df['longTermDebt'] / annual_balancesheet_df['working_capital'])
            annual_balancesheet_df['debtEquity_ratio'] = (annual_balancesheet_df['totalLiabilities'] / annual_balancesheet_df['totalShareholderEquity'])
            annual_balancesheet_df['quick_ratio'] = ((annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['inventory']) / annual_balancesheet_df['totalCurrentLiabilities'])

            
        if key == 'quarterlyReports':
            qtr_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_balancesheet_df = qtr_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in qtr_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_balancesheet_df[column] = pd.to_numeric(qtr_balancesheet_df[column], errors='coerce')

    
            qtr_balancesheet_df['current_ratio'] = (qtr_balancesheet_df['totalCurrentAssets'] / qtr_balancesheet_df['totalCurrentLiabilities'])
            qtr_balancesheet_df['working_capital'] = qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['totalCurrentLiabilities']
            qtr_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (qtr_balancesheet_df['longTermDebt'] / qtr_balancesheet_df['working_capital'])
            qtr_balancesheet_df['debtEquity_ratio'] = (qtr_balancesheet_df['totalLiabilities'] / qtr_balancesheet_df['totalShareholderEquity'])
            qtr_balancesheet_df['quick_ratio'] = ((qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['inventory']) / qtr_balancesheet_df['totalCurrentLiabilities'])

            qtr_balancesheet_df['BVPS_latest'] = round(
                qtr_balancesheet_df.tail(1)['totalShareholderEquity'].sum() / qtr_balancesheet_df.tail(1)['commonStockSharesOutstanding']
                ,2
                )



    balancesheet_ratio_cols = [
        'current_ratio'
        ,'working_capital'
        ,'longTermDebt_to_workingCp_ratio'
        ,'debtEquity_ratio'
        ,'quick_ratio'
        ,'totalShareholderEquity'
    ]


    # CASHFLOW STATEMENT
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():

        if key == 'annualReports':
            annual_cashflow_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_cashflow_df = annual_cashflow_df.sort_values('fiscalDateEnding', ascending=True)

            for column in annual_cashflow_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                annual_cashflow_df[column] = pd.to_numeric(annual_cashflow_df[column], errors='coerce')
                annual_cashflow_df[f'{column}_YoY'] = annual_cashflow_df[column].pct_change() * 100

            
        if key == 'quarterlyReports':
            qtr_cashflow_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_cashflow_df = qtr_cashflow_df.sort_values('fiscalDateEnding', ascending=True)

            for column in qtr_cashflow_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_cashflow_df[column] = pd.to_numeric(qtr_cashflow_df[column], errors='coerce')
                qtr_cashflow_df[f'{column}_YoY'] = qtr_cashflow_df[column].pct_change() * 100


    stock_ratios_annual_consolidate_df = pd.DataFrame()
    stock_ratios_qtr_consolidate_df = pd.DataFrame()

    stock_ratios_annual_consolidate_df[income_ratio_cols] = annual_income_df[income_ratio_cols]
    stock_ratios_annual_consolidate_df[balancesheet_ratio_cols] = annual_balancesheet_df[balancesheet_ratio_cols]

    stock_ratios_qtr_consolidate_df[income_ratio_cols] = qtr_income_df[income_ratio_cols]
    stock_ratios_qtr_consolidate_df[balancesheet_ratio_cols] = qtr_balancesheet_df[balancesheet_ratio_cols]

    # calculating new consolidated metrics 
    # annual df
    stock_ratios_annual_consolidate_df['ROE_%'] = (annual_income_df['netIncome'] / annual_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_annual_consolidate_df['liquidation_value'] = ((annual_balancesheet_df['totalAssets'] - annual_balancesheet_df['intangibleAssets']) - annual_balancesheet_df['totalLiabilities'])
    stock_ratios_annual_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_annual_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_annual_consolidate_df['fiscalDateEnding'] = annual_balancesheet_df['fiscalDateEnding']

    # qtr df
    stock_ratios_qtr_consolidate_df['ROE_%'] = (qtr_income_df['netIncome'] / qtr_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_qtr_consolidate_df['liquidation_value'] = ((qtr_balancesheet_df['totalAssets'] - qtr_balancesheet_df['intangibleAssets']) - qtr_balancesheet_df['totalLiabilities'])
    stock_ratios_qtr_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_qtr_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_qtr_consolidate_df['fiscalDateEnding'] = qtr_balancesheet_df['fiscalDateEnding']





    # Consolidated section
    stock_consolidate_df = Daily_stock_df.head(window_days)


    stock_consolidate_df_date = stock_consolidate_df.index
    for i in stock_consolidate_df_date:
                
        # Filter the DataFrame to include only dates(index) less than or equal to the target date
        filtered_qtrEPS_df = qtrEPS_df[qtrEPS_df['reportedDate'] < i]

        # Select the first four rows from the past_qtrs_EPS
        past_4_qtrs_EPS = filtered_qtrEPS_df.head(4) 
        past_3_qtrs_EPS = filtered_qtrEPS_df.head(3)
        past_1_qtr_EPS = filtered_qtrEPS_df.head(1)

        # Calculate the sum of the numeric values in the selected rows
        EPS_TTM = past_4_qtrs_EPS['reportedEPS'].values.sum()
        EPS_curr_qtr = past_1_qtr_EPS['reportedEPS'].values.sum()

        # assign each index row with the EPS_TTM
        stock_consolidate_df.loc[i, "EPS_TTM"] = EPS_TTM
        stock_consolidate_df.loc[i, "EPS_currentQtr"] = EPS_curr_qtr

        if i == max(stock_consolidate_df.index):
            EPS_nextQtr_projected = latest_projected_EPS + past_3_qtrs_EPS['reportedEPS'].values.sum()  # This metrics is the past 3 qtrs post EPS + 1 projected EPS
        else:
            continue

        stock_consolidate_df["EPS_nextQtr"] = latest_projected_EPS
        stock_consolidate_df["EPS_nextQtr_TTM"] = EPS_nextQtr_projected


    

    # stock's stats
    stock_consolidate_df["Ticker"] = symbol
    stock_consolidate_df["PE_1yr_forward"] = round(PE_12month_projected, 2)
    stock_consolidate_df["PE_TTM"] = (stock_consolidate_df["stock_price"] / stock_consolidate_df["EPS_TTM"]).round(2)
    stock_consolidate_df["PE_TTM_avg"] = stock_consolidate_df["PE_TTM"].mean().round(2)
    stock_consolidate_df["PE_TTM_std"] = np.std(stock_consolidate_df["PE_TTM"]).round(2)
    stock_consolidate_df["PE_TTM_volatility_+"] = (stock_consolidate_df["PE_TTM_avg"] + stock_consolidate_df["PE_TTM_std"]).round(2) # 这个是PE的波动范围上限
    stock_consolidate_df["PE_TTM_volatility_-"] = (stock_consolidate_df["PE_TTM_avg"] - stock_consolidate_df["PE_TTM_std"]).round(2) # 这个是PE的波动范围下限

    stock_consolidate_df["relative_valuation_TTM_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * stock_consolidate_df["EPS_TTM"]).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_TTM_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * stock_consolidate_df["EPS_TTM"]).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_TTM_median"] = (np.median([stock_consolidate_df["relative_valuation_TTM_+"][0], stock_consolidate_df["relative_valuation_TTM_-"][0]])).round(2) #这个是根据最新TTM PE估值的价格中位数

    stock_consolidate_df["relative_valuation_nextQuater_projected_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * stock_consolidate_df["EPS_nextQtr_TTM"]).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_nextQuater_projected_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * stock_consolidate_df["EPS_nextQtr_TTM"]).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_nextQuater_projected_median"] = (np.median([stock_consolidate_df["relative_valuation_nextQuater_projected_+"][0], stock_consolidate_df["relative_valuation_nextQuater_projected_-"][0]])).round(2) #这个是根据3 qtrs post EPS + 1 projected EPS 得出PE估值的价格中位数

    stock_consolidate_df[f"{window_days}_price_min"] = stock_consolidate_df["stock_price"].min().round(2)
    stock_consolidate_df[f"{window_days}_price_max"] = stock_consolidate_df["stock_price"].max().round(2)
    stock_consolidate_df[f"{window_days}_price_avg"] = stock_consolidate_df["stock_price"].mean().round(2)
    stock_consolidate_df[f"{window_days}_price_std"] = np.std(stock_consolidate_df["stock_price"]).round(2)

    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_avg"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_avg"].values[0]
    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_std"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_std"].values[0]
    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_volatility_+"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_+"].values[0]
    stock_consolidate_df[f"PE_{PE_yr_range-1}yr_volatility_-"] = annualEPS_df[f"PE_{PE_yr_range-1}yr_volatility_-"].values[0]


    stock_consolidate_df["relative_valuation_nextYear_projected_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * EPS_12month_projected).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_nextYear_projected_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * EPS_12month_projected).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_nextYear_projected_median"] = (np.median([stock_consolidate_df["relative_valuation_nextYear_projected_+"][0], stock_consolidate_df["relative_valuation_nextYear_projected_-"][0]])).round(2) #这个是根据next year projected EPS 得出PE估值的价格中位数

    stock_consolidate_df["PEG_next12months"] = PEG_12month_projected
    stock_consolidate_df["EPS_next12months"] = EPS_12month_projected
    stock_consolidate_df["PEG_TTM"] = (stock_consolidate_df["PE_TTM"] / (((EPS_12month_projected - stock_consolidate_df["EPS_TTM"]) / stock_consolidate_df["EPS_TTM"]) * 100)).round(2) # 这个是截止下一年的EPS growth rate所得出的PEG ratio, <1是undervalue的表现
    
    stock_consolidate_df["EPS_nextYr_growthRate"] = (((EPS_12month_projected - stock_consolidate_df["EPS_TTM"]) / stock_consolidate_df["EPS_TTM"]) * 100).round(2)
    stock_consolidate_df["EPS_nextQtr_growthRate"] = (((stock_consolidate_df["EPS_nextQtr_TTM"] - stock_consolidate_df["EPS_TTM"]) / stock_consolidate_df["EPS_TTM"]) * 100).round(2)

    stock_consolidate_df["EarningYield_TTM"] = ((stock_consolidate_df["EPS_TTM"] / stock_consolidate_df["stock_price"]) * 100).round(2)
    stock_consolidate_df["ERP_TTM"] = stock_consolidate_df["EarningYield_TTM"] - US_T_10yrs_YTM # it means a comparison between equity return and 10 years risk free, usually ERP >= 3 for short term invest at least, ERP >= 5 for long term invest 

    stock_consolidate_df['latest_qtr_liquidation_mktcap_ratio_%'] = stock_ratios_qtr_consolidate_df['liquidation_mktcap_ratio_%'].values[-1]
    stock_consolidate_df['BVPS_latest'] = qtr_balancesheet_df['BVPS_latest'].values[-1]

    stock_consolidate_df['MA200_slope'] = Daily_stock_df['MA200_slope']

    stock_consolidate_df['FCF_per_share_TTM'] = round((qtr_cashflow_df['operatingCashflow'] - qtr_cashflow_df['capitalExpenditures']).tail(4).sum() / qtr_balancesheet_df['commonStockSharesOutstanding'].values[-1], 2)
    stock_consolidate_df['PFCF_TTM'] = round(stock_consolidate_df['stock_price'][0]  / stock_consolidate_df['FCF_per_share_TTM'], 2)
    stock_consolidate_df['FCF_yield_TTM'] = round((stock_consolidate_df['FCF_per_share_TTM'] / stock_consolidate_df['stock_price'][0]) * 100, 2) # % return on cash of every single dollar you spent on stock price, the higher the better


    # filter conditions
    conditions = [
    (stock_consolidate_df["stock_price"] < stock_consolidate_df["relative_valuation_TTM_-"]),
    (stock_consolidate_df["stock_price"] > stock_consolidate_df["relative_valuation_TTM_+"]),
    ((stock_consolidate_df["stock_price"] >= stock_consolidate_df["relative_valuation_TTM_-"]) & (stock_consolidate_df["stock_price"] <= stock_consolidate_df["relative_valuation_TTM_+"])),
    ]

    categories = [
        'undervalued'
        ,'overvalued'
        ,'fair'
        ]

    # This KPI assess if the current stock price is under/over/fair to the current relative valuation
    stock_consolidate_df["curr_assessment"] = None

    for condition, category in zip(conditions, categories):
        stock_consolidate_df.loc[condition, "price_valuation_assessment"] = category





    # Append key-value pairs to the dictionary
    selected_cols = [
    "Ticker"
    ,"stock_price"
    ,"EPS_currentQtr"
    ,"EPS_nextQtr"
    ,"EPS_TTM"
    ,"EPS_nextQtr_TTM"
    ,"EPS_next12months"
    ,"PE_1yr_forward"
    ,"PE_TTM"
    ,"PE_TTM_avg"
    ,"PE_TTM_volatility_+"
    ,"PE_TTM_volatility_-"
    ,f"PE_{PE_yr_range-1}yr_avg"
    ,f"PE_{PE_yr_range-1}yr_volatility_+"
    ,f"PE_{PE_yr_range-1}yr_volatility_-"
    ,"relative_valuation_TTM_+"
    ,"relative_valuation_TTM_-"
    ,"relative_valuation_TTM_median"
    ,"relative_valuation_nextQuater_projected_+"
    ,"relative_valuation_nextQuater_projected_-"
    ,"relative_valuation_nextQuater_projected_median"
    ,"relative_valuation_nextYear_projected_+"
    ,"relative_valuation_nextYear_projected_-"
    ,"relative_valuation_nextYear_projected_median"
    ,"price_valuation_assessment"
    ,"EPS_nextQtr_growthRate"
    ,"EPS_nextYr_growthRate"
    ,"PEG_next12months"
    ,"PEG_TTM"
    ,"EarningYield_TTM"
    ,"ERP_TTM"
    ,"latest_qtr_liquidation_mktcap_ratio_%"
    ,"BVPS_latest"
    ,"PFCF_TTM"
    ,"FCF_yield_TTM" 
    ,"MA200_slope"
    ]


    # store each stock info as pd into dictionary
    ticker_dict_pd[symbol] = stock_consolidate_df[selected_cols]
    # transfer pandas dataframe to json format, and each stock info into dictionary
    ticker_dict_json[symbol] = stock_consolidate_df[selected_cols].to_dict()

    if j == 0:
        # screener df creation
        # screener df will store each stock's consolidate df's first row and union them together for screening purposee
        ticker_screen_df = pd.DataFrame(
            columns=selected_cols
        ) 
        stock_consolidate_df_values = stock_consolidate_df[selected_cols].values[0]

        # Insert rows into the DataFrame
        ticker_screen_df.loc[j] = stock_consolidate_df_values

    else:
        stock_consolidate_df_values = stock_consolidate_df[selected_cols].values[0]
        ticker_screen_df.loc[j] = stock_consolidate_df_values


# ticker screen df consolidated metrics
ticker_screen_df['Industry_PE_TTM_avg'] = round(ticker_screen_df['PE_TTM'].mean(), 2)

0 GOOG
1 MSFT


In [34]:
ticker_dict_pd['MSFT']

Unnamed: 0,Ticker,stock_price,EPS_currentQtr,EPS_nextQtr,EPS_TTM,EPS_nextQtr_TTM,EPS_next12months,PE_1yr_forward,PE_TTM,PE_TTM_avg,PE_TTM_volatility_+,PE_TTM_volatility_-,PE_5yr_avg,PE_5yr_volatility_+,PE_5yr_volatility_-,relative_valuation_TTM_+,relative_valuation_TTM_-,relative_valuation_TTM_median,relative_valuation_nextQuater_projected_+,relative_valuation_nextQuater_projected_-,relative_valuation_nextQuater_projected_median,relative_valuation_nextYear_projected_+,relative_valuation_nextYear_projected_-,relative_valuation_nextYear_projected_median,price_valuation_assessment,EPS_nextQtr_growthRate,EPS_nextYr_growthRate,PEG_next12months,PEG_TTM,EarningYield_TTM,ERP_TTM,latest_qtr_liquidation_mktcap_ratio_%,BVPS_latest,PFCF_TTM,FCF_yield_TTM,MA200_slope
2025-01-21,MSFT,428.5,3.3,0,12.12,9.19,14.95,32.57,35.35,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.51,2.83,-1.78,4.44,38.69,45.54,2.2,0.02065
2025-01-17,MSFT,429.03,3.3,0,12.12,9.19,14.95,32.57,35.4,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.52,2.82,-1.79,4.44,38.69,45.54,2.2,0.029117
2025-01-16,MSFT,424.58,3.3,0,12.12,9.19,14.95,32.57,35.03,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.5,2.85,-1.76,4.44,38.69,45.54,2.2,0.039417
2025-01-15,MSFT,426.31,3.3,0,12.12,9.19,14.95,32.57,35.17,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.51,2.84,-1.77,4.44,38.69,45.54,2.2,0.051653
2025-01-14,MSFT,415.67,3.3,0,12.12,9.19,14.95,32.57,34.3,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,undervalued,-24.17,23.35,2.25,1.47,2.92,-1.69,4.44,38.69,45.54,2.2,0.064743
2025-01-13,MSFT,417.19,3.3,0,12.12,9.19,14.95,32.57,34.42,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,undervalued,-24.17,23.35,2.25,1.47,2.91,-1.7,4.44,38.69,45.54,2.2,0.078678
2025-01-10,MSFT,418.95,3.3,0,12.12,9.19,14.95,32.57,34.57,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,undervalued,-24.17,23.35,2.25,1.48,2.89,-1.72,4.44,38.69,45.54,2.2,0.092939
2025-01-08,MSFT,424.56,3.3,0,12.12,9.19,14.95,32.57,35.03,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.5,2.85,-1.76,4.44,38.69,45.54,2.2,0.107247
2025-01-07,MSFT,422.37,3.3,0,12.12,9.19,14.95,32.57,34.85,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.49,2.87,-1.74,4.44,38.69,45.54,2.2,0.120891
2025-01-06,MSFT,427.85,3.3,0,12.12,9.19,14.95,32.57,35.3,35.55,36.46,34.64,35.69,40.81,30.57,441.9,419.84,430.87,335.07,318.34,326.7,545.08,517.87,531.48,fair,-24.17,23.35,2.25,1.51,2.83,-1.78,4.44,38.69,45.54,2.2,0.133543


In [15]:
ticker_screen_df = ticker_screen_df.sort_values(by=['FCF_yield_TTM']
                                                ,ascending=False)

selected_cols = [
    'Ticker'
    ,'EPS_TTM'
    ,'PE_TTM'
    ,'PE_TTM_avg'
    ,'price_valuation_assessment'
    # ,"EPS_nextQtr_growthRate"
    ,"EPS_nextYr_growthRate"
    ,"PEG_next12months"
    # ,"PEG_TTM"
    ,'ERP_TTM'
    ,'latest_qtr_liquidation_mktcap_ratio_%'
    ,'PFCF_TTM'
    ,'FCF_yield_TTM'
    ,'Industry_PE_TTM_avg'
    ,'BVPS_latest'
    ,'stock_price'
]

ticker_screen_df[selected_cols]

Unnamed: 0,Ticker,EPS_TTM,PE_TTM,PE_TTM_avg,price_valuation_assessment,EPS_nextYr_growthRate,PEG_next12months,ERP_TTM,latest_qtr_liquidation_mktcap_ratio_%,PFCF_TTM,FCF_yield_TTM,Industry_PE_TTM_avg,BVPS_latest,stock_price
1,CE,9.14,8.1,10.63,fair,5.25,4.42,7.74,-45.12,9.59,10.43,17.29,66.57,74.02
0,GOOG,7.54,26.48,24.12,overvalued,18.7,1.29,-0.83,11.72,43.87,2.28,17.29,25.61,199.63


In [None]:
ticker_screen_df

# [ticker_screen_df['price_valuation_assessment'] == 'undervalued']


Unnamed: 0,Ticker,stock_price,EPS_currentQtr,EPS_nextQtr,EPS_TTM,EPS_nextQtr_TTM,EPS_next12months,PE_1yr_forward,PE_TTM,PE_TTM_avg,PE_TTM_volatility_+,PE_TTM_volatility_-,PE_5yr_avg,PE_5yr_volatility_+,PE_5yr_volatility_-,relative_valuation_TTM_+,relative_valuation_TTM_-,relative_valuation_TTM_median,relative_valuation_nextQuater_projected_+,relative_valuation_nextQuater_projected_-,relative_valuation_nextQuater_projected_median,relative_valuation_nextYear_projected_+,relative_valuation_nextYear_projected_-,relative_valuation_nextYear_projected_median,price_valuation_assessment,EPS_nextQtr_growthRate,EPS_nextYr_growthRate,PEG_next12months,PEG_TTM,EarningYield_TTM,ERP_TTM,latest_qtr_liquidation_mktcap_ratio_%,BVPS_latest,PFCF_TTM,FCF_yield_TTM,MA200_slope,Industry_PE_TTM_avg
2,AXP,269.11,3.49,3.29,12.74,12.73,14.94,18.59,21.12,19.69,20.57,18.81,18.08,24.49,11.67,262.06,239.64,250.85,261.86,239.45,250.66,307.32,281.02,294.17,overvalued,-0.08,17.27,2.09,1.22,4.73,0.7,,41.49,8.22,12.17,0.479385,28.82
0,V,274.96,2.42,2.58,9.67,9.92,11.07,24.51,28.43,28.37,29.28,27.46,33.45,38.96,27.94,283.14,265.54,274.34,290.46,272.4,281.43,324.13,303.98,314.06,fair,2.59,14.48,1.78,1.96,3.52,-0.51,-1.0,21.65,26.62,3.76,0.114991,28.82
1,MA,497.06,3.59,3.73,13.47,13.81,16.63,29.59,36.9,35.24,36.22,34.26,38.85,47.45,30.25,487.88,461.48,474.68,500.2,473.13,486.66,602.34,569.74,586.04,overvalued,2.52,23.46,1.78,1.57,2.71,-1.32,-0.93,7.99,41.98,2.38,0.395689,28.82


# Price EPS chart

In [16]:
# Parameters section
alpha_vantage_api_key = API_KEY


ticker_symbols = [
    'CE'
]



In [17]:
# Daily quote section
for j, symbol in enumerate(ticker_symbols):

    print(j, symbol)
    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df = pd.DataFrame()
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()


    # Daily quote section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={alpha_vantage_api_key}&outputsize=full'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Time Series (Daily)':


            selected_cols = [
                '4. close'
            ]

            Daily_stock_df = pd.DataFrame(value).transpose()[selected_cols] # tranpose the dataframe and sub select selected cols

            # Rename columns
            Daily_stock_df.rename(
                columns={
                    '4. close': 'stock_price'
                    }
                ,inplace=True
                )
            
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].round(2)
            Daily_stock_df.index = pd.to_datetime(Daily_stock_df.index)


    for date_i in Daily_stock_df.index.date:
        for date_j in stock_split_record_df['effective_date'].dt.date:
            if date_i == date_j:

                # stock price to divided the split factor
                Daily_stock_df.loc[Daily_stock_df.index.date < date_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.date == date_j].values[0])




    # Earning section
    # past earnings from alpha vintage API
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year


            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=False).reset_index(drop=True)



        if key == 'quarterlyEarnings':

            selected_cols = [
                'reportedDate'
                ,'reportedEPS'
            ]

            qtrEPS_df = pd.DataFrame(value)[selected_cols] # tranpose the dataframe and sub select selected cols
            qtrEPS_df['reportedDate'] = pd.to_datetime(qtrEPS_df['reportedDate'])

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    qtrEPS_df[col] = qtrEPS_df[col].astype(str).apply(lambda x: float(x) if x not in [None, 'None', 'nan', 'NaN'] else float(0))
                else:
                    continue



    # Consolidated section
    stock_consolidate_df = Daily_stock_df
    stock_consolidate_df_date = stock_consolidate_df.index
    for i in stock_consolidate_df_date:
                
        # Filter the DataFrame to include only dates(index) less than or equal to the target date
        filtered_qtrEPS_df = qtrEPS_df[qtrEPS_df['reportedDate'] < i]

        # Select the first four rows from the past_qtrs_EPS
        past_4_qtrs_EPS = filtered_qtrEPS_df.head(4) 
        past_3_qtrs_EPS = filtered_qtrEPS_df.head(3)
        past_1_qtr_EPS = filtered_qtrEPS_df.head(1)

        # Calculate the sum of the numeric values in the selected rows
        EPS_TTM = past_4_qtrs_EPS['reportedEPS'].values.sum()
        EPS_curr_qtr = past_1_qtr_EPS['reportedEPS'].values.sum()

        # assign each index row with the EPS_TTM
        stock_consolidate_df.loc[i, "EPS_TTM"] = EPS_TTM
        stock_consolidate_df.loc[i, "EPS_currentQtr"] = EPS_curr_qtr


    # stock's stats
    stock_consolidate_df["Ticker"] = symbol
    stock_consolidate_df["PE_TTM"] = (stock_consolidate_df["stock_price"] / stock_consolidate_df["EPS_TTM"]).round(2)
    stock_consolidate_df["PE_TTM_avg"] = stock_consolidate_df["PE_TTM"].mean().round(2)
    stock_consolidate_df["PE_TTM_std"] = np.std(stock_consolidate_df["PE_TTM"]).round(2)
    stock_consolidate_df["PE_TTM_volatility_+"] = (stock_consolidate_df["PE_TTM_avg"] + stock_consolidate_df["PE_TTM_std"]).round(2) # 这个是PE的波动范围上限
    stock_consolidate_df["PE_TTM_volatility_-"] = (stock_consolidate_df["PE_TTM_avg"] - stock_consolidate_df["PE_TTM_std"]).round(2) # 这个是PE的波动范围下限

    stock_consolidate_df["relative_valuation_TTM_+"] = (stock_consolidate_df["PE_TTM_volatility_+"] * stock_consolidate_df["EPS_TTM"]).round(2) # 这个是relative valuation的价格上限
    stock_consolidate_df["relative_valuation_TTM_-"] = (stock_consolidate_df["PE_TTM_volatility_-"] * stock_consolidate_df["EPS_TTM"]).round(2) # 这个是relative valuation的价格下限
    stock_consolidate_df["relative_valuation_TTM_median"] = (np.median([stock_consolidate_df["relative_valuation_TTM_+"][0], stock_consolidate_df["relative_valuation_TTM_-"][0]])).round(2) #这个是根据最新TTM PE估值的价格中位数


0 CE


In [18]:
import plotly.graph_objects as go


# Create the figure
fig = go.Figure()

# Add stock_price on primary y-axis (left)
fig.add_trace(go.Scatter(
    x=stock_consolidate_df.index,
    y=stock_consolidate_df["stock_price"],
    mode='lines',
    line=dict(color='black'),
    name='Stock Price',
    yaxis="y1"
))

# Add EPS_TTM on secondary y-axis (right)
fig.add_trace(go.Scatter(
    x=stock_consolidate_df.index,
    y=stock_consolidate_df["EPS_TTM"],
    mode='lines',
    fill='tonexty',  # Shadow effect
    line=dict(color='green'),
    name='EPS_TTM',
    yaxis="y2"
))

# Add PE_TTM on secondary y-axis (right)
fig.add_trace(go.Scatter(
    x=stock_consolidate_df.index,
    y=stock_consolidate_df["PE_TTM"],
    mode='lines',
    line=dict(color='blue', dash='dot'),
    name='PE_TTM',
    yaxis="y2"
))

# Update layout to remove grid and configure dual y-axes
fig.update_layout(
    title="Stock Metrics Over Time",
    xaxis=dict(title="Date", showgrid=False),
    yaxis=dict(
        title="Stock Price",
        showgrid=False,
        titlefont=dict(color="black"),
        tickfont=dict(color="black")
    ),
    yaxis2=dict(
        title="EPS_TTM & PE_TTM",
        overlaying="y",  # Overlay with y1
        side="right",
        showgrid=False,
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue")
    ),
    legend_title="Metrics",
    template="plotly_white",
    plot_bgcolor='white'  # Background color
)

# Show the plot
fig.show()

# Financial Statement

In [20]:
stock_financial_dict = {} # the dict will store ticker as key, the string value of 'annual_df' and 'qtr_df' as nested key, actual json as value

ticker_symbols = [
    'CE'
    ]
# ticker_symbols = electric_utility_symbols
PE_yr_range = 15 # test for 'X' years PE 


# storage process
for j, symbol in enumerate(ticker_symbols):
    print(j, symbol)
    # COMPANY OVERVIEW
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    
    for key, value in data.items():
        if key  == 'MarketCapitalization':
            stock_mkt_cap = float(value)



    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()



    # Monthly quote section
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Monthly Time Series':
            Monthly_stock_df = pd.DataFrame(value)


    Monthly_stock_df = Monthly_stock_df.transpose()
    Monthly_stock_df.index = pd.to_datetime(Monthly_stock_df.index)


    filter_1 = (Monthly_stock_df.index.year.isin(range((datetime.today().year - PE_yr_range) ,datetime.today().year)))
    filter_2 = (Monthly_stock_df.index.month == 12) # month = 12 to get the year end closing price

    selected_cols = [
        '4. close'
    ]

    Monthly_stock_df = Monthly_stock_df[
        filter_1
        & filter_2
    ][selected_cols]

    # Rename columns
    Monthly_stock_df.rename(
        columns={
            '4. close': 'stock_price'
            }
        ,inplace=True
        )

    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].round(2)

    # modify stock price based on stock split
    for year_i in Monthly_stock_df.index.year:
        for year_j in stock_split_record_df['effective_date'].dt.year:
            if year_i == year_j:

                # stock price to divided the split factor
                Monthly_stock_df.loc[Monthly_stock_df.index.year < year_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.year == year_j].values[0])
    


    # Earning section
    # past earnings from alpha vintage API
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year

            annualEPS_df = annualEPS_df[
                annualEPS_df['fiscalDateEnding'].isin(
                    range(
                        (datetime.today().year - PE_yr_range) 
                        ,datetime.today().year
                            )
                            )
                            ]

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=False).reset_index(drop=True)

            # calculate PE
            annualEPS_df["PE"] = Monthly_stock_df["stock_price"].values / annualEPS_df['reportedEPS'].values




    # INCOME STATEMENT
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()


    for key, value in data.items():
        if key == 'annualReports':
            annual_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_income_df = annual_income_df.sort_values('fiscalDateEnding', ascending=True)

            # annual measurements cols
            annual_income_metric_cols = annual_income_df.columns[1:]

            # Calculate quarter-over-quarter change percentage
            for column in annual_income_metric_cols:  # Exclude the 'fiscalDateEnding' column
                annual_income_df[column] = pd.to_numeric(annual_income_df[column], errors='coerce')
                annual_income_df[f'{column}_YoY'] = annual_income_df[column].pct_change() * 100 
            
            annual_income_YoY_metric_cols = [col for col in annual_income_df.columns if 'YoY' in col]

            # ratios of income statement calculation
            annual_income_df['gross_margin_%'] = (annual_income_df['grossProfit'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['operating_margin_%'] = (annual_income_df['operatingIncome'] / annual_income_df['totalRevenue']) * 100
            annual_income_df['net_margin_%'] = (annual_income_df['netIncome'] / annual_income_df['totalRevenue']) * 100



        if key == 'quarterlyReports':
            qtr_income_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_income_df = qtr_income_df.sort_values('fiscalDateEnding', ascending=True)

            # qtr measurements cols
            qtr_income_metric_cols = qtr_income_df.columns[1:]
            
            # Calculate quarter-over-quarter change percentage
            for column in qtr_income_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_income_df[column] = pd.to_numeric(qtr_income_df[column], errors='coerce')
                qtr_income_df[f'{column}_QoQ'] = qtr_income_df[column].pct_change() * 100

            qtr_income_QoQ_metric_cols = [col for col in qtr_income_df.columns if 'QoQ' in col]

            # ratios of income statement calculation
            qtr_income_df['gross_margin_%'] = (qtr_income_df['grossProfit'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['operating_margin_%'] = (qtr_income_df['operatingIncome'] / qtr_income_df['totalRevenue']) * 100
            qtr_income_df['net_margin_%'] = (qtr_income_df['netIncome'] / qtr_income_df['totalRevenue']) * 100

    income_ratio_cols = [
        'gross_margin_%'
        ,'operating_margin_%'
        ,'net_margin_%'
        ,'netIncome'
        ]
    
    
    # BALANCESHEET
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():

        if key == 'annualReports':
            annual_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_balancesheet_df = annual_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in annual_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                annual_balancesheet_df[column] = pd.to_numeric(annual_balancesheet_df[column], errors='coerce')
                annual_balancesheet_df[f'{column}_YoY'] = annual_balancesheet_df[column].pct_change() * 100

            annual_balancesheet_df['current_ratio'] = (annual_balancesheet_df['totalCurrentAssets'] / annual_balancesheet_df['totalCurrentLiabilities'])
            annual_balancesheet_df['working_capital'] = annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['totalCurrentLiabilities']
            annual_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (annual_balancesheet_df['longTermDebt'] / annual_balancesheet_df['working_capital'])
            annual_balancesheet_df['debtEquity_ratio'] = (annual_balancesheet_df['totalLiabilities'] / annual_balancesheet_df['totalShareholderEquity'])
            annual_balancesheet_df['quick_ratio'] = ((annual_balancesheet_df['totalCurrentAssets'] - annual_balancesheet_df['inventory']) / annual_balancesheet_df['totalCurrentLiabilities'])

            
        if key == 'quarterlyReports':
            qtr_balancesheet_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_balancesheet_df = qtr_balancesheet_df.sort_values('fiscalDateEnding', ascending=True)

            for column in qtr_balancesheet_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_balancesheet_df[column] = pd.to_numeric(qtr_balancesheet_df[column], errors='coerce')
                qtr_balancesheet_df[f'{column}_YoY'] = qtr_balancesheet_df[column].pct_change() * 100

            qtr_balancesheet_df['current_ratio'] = (qtr_balancesheet_df['totalCurrentAssets'] / qtr_balancesheet_df['totalCurrentLiabilities'])
            qtr_balancesheet_df['working_capital'] = qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['totalCurrentLiabilities']
            qtr_balancesheet_df['longTermDebt_to_workingCp_ratio'] = (qtr_balancesheet_df['longTermDebt'] / qtr_balancesheet_df['working_capital'])
            qtr_balancesheet_df['debtEquity_ratio'] = (qtr_balancesheet_df['totalLiabilities'] / qtr_balancesheet_df['totalShareholderEquity'])
            qtr_balancesheet_df['quick_ratio'] = ((qtr_balancesheet_df['totalCurrentAssets'] - qtr_balancesheet_df['inventory']) / qtr_balancesheet_df['totalCurrentLiabilities'])


    balancesheet_ratio_cols = [
        'current_ratio'
        ,'working_capital'
        ,'longTermDebt_to_workingCp_ratio'
        ,'debtEquity_ratio'
        ,'quick_ratio'
        ,'totalShareholderEquity'
    ]


    # CASHFLOW STATEMENT
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():

        if key == 'annualReports':
            annual_cashflow_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            annual_cashflow_df = annual_cashflow_df.sort_values('fiscalDateEnding', ascending=True)

            for column in annual_cashflow_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                annual_cashflow_df[column] = pd.to_numeric(annual_cashflow_df[column], errors='coerce')
                annual_cashflow_df[f'{column}_YoY'] = annual_cashflow_df[column].pct_change() * 100

            
        if key == 'quarterlyReports':
            qtr_cashflow_df = pd.DataFrame(value).drop(['reportedCurrency'], axis=1)
            qtr_cashflow_df = qtr_cashflow_df.sort_values('fiscalDateEnding', ascending=True)

            for column in qtr_cashflow_df.columns[1:]:  # Exclude the 'fiscalDateEnding' column
                qtr_cashflow_df[column] = pd.to_numeric(qtr_cashflow_df[column], errors='coerce')
                qtr_cashflow_df[f'{column}_YoY'] = qtr_cashflow_df[column].pct_change() * 100



    cashflow_ratio_cols = [
        'operatingCashflow'
        ,'capitalExpenditures'
        ,'cashflowFromInvestment'
        ,'cashflowFromFinancing'
        ,'netIncome'
    ]


    ####################
    ### Consolidated ###
    ####################
    stock_ratios_annual_consolidate_df = pd.DataFrame()
    stock_ratios_qtr_consolidate_df = pd.DataFrame()
    stock_PE_annual_consolidate_df = pd.DataFrame()

    stock_ratios_annual_consolidate_df[income_ratio_cols] = annual_income_df[income_ratio_cols]
    stock_ratios_annual_consolidate_df[balancesheet_ratio_cols] = annual_balancesheet_df[balancesheet_ratio_cols]
    stock_ratios_annual_consolidate_df[cashflow_ratio_cols] = annual_cashflow_df[cashflow_ratio_cols]

    stock_ratios_qtr_consolidate_df[income_ratio_cols] = qtr_income_df[income_ratio_cols]
    stock_ratios_qtr_consolidate_df[balancesheet_ratio_cols] = qtr_balancesheet_df[balancesheet_ratio_cols]
    stock_ratios_qtr_consolidate_df[cashflow_ratio_cols] = qtr_cashflow_df[cashflow_ratio_cols]

    # calculating new consolidated metrics 
    # annual df
    stock_ratios_annual_consolidate_df['ROE_%'] = (annual_income_df['netIncome'] / annual_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_annual_consolidate_df['liquidation_value'] = ((annual_balancesheet_df['totalAssets'] - annual_balancesheet_df['intangibleAssets']) - annual_balancesheet_df['totalLiabilities'])
    stock_ratios_annual_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_annual_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_annual_consolidate_df['fiscalDateEnding'] = annual_balancesheet_df['fiscalDateEnding']

    stock_ratios_annual_consolidate_df['PE'] = annualEPS_df["PE"]
    stock_ratios_annual_consolidate_df['reportedEPS'] = annualEPS_df["reportedEPS"]

    # YoY %
    stock_ratios_annual_consolidate_df['QoQ_gross_margin_%_%'] = stock_ratios_annual_consolidate_df["gross_margin_%"].pct_change() * 100
    stock_ratios_annual_consolidate_df['QoQ_operating_margin_%_%'] = stock_ratios_annual_consolidate_df["operating_margin_%"].pct_change() * 100
    stock_ratios_annual_consolidate_df['QoQ_net_margin_%_%'] = stock_ratios_annual_consolidate_df["net_margin_%"].pct_change() * 100
    stock_ratios_annual_consolidate_df['YoY_reportedEPS_%'] = stock_ratios_annual_consolidate_df["reportedEPS"].pct_change() * 100 # yearly EPS 的增速



    # qtr df
    stock_ratios_qtr_consolidate_df['ROE_%'] = (qtr_income_df['netIncome'] / qtr_balancesheet_df['totalShareholderEquity']) * 100
    stock_ratios_qtr_consolidate_df['liquidation_value'] = ((qtr_balancesheet_df['totalAssets'] - qtr_balancesheet_df['intangibleAssets']) - qtr_balancesheet_df['totalLiabilities'])
    stock_ratios_qtr_consolidate_df['liquidation_mktcap_ratio_%'] = round((stock_ratios_qtr_consolidate_df['liquidation_value'] / stock_mkt_cap) * 100, 2)
    stock_ratios_qtr_consolidate_df['fiscalDateEnding'] = qtr_balancesheet_df['fiscalDateEnding']
    
    # QoQ %
    stock_ratios_qtr_consolidate_df['QoQ_gross_margin_%_%'] = stock_ratios_qtr_consolidate_df["gross_margin_%"].pct_change() * 100
    stock_ratios_qtr_consolidate_df['QoQ_operating_margin_%_%'] = stock_ratios_qtr_consolidate_df["operating_margin_%"].pct_change() * 100
    stock_ratios_qtr_consolidate_df['QoQ_net_margin_%_%'] = stock_ratios_qtr_consolidate_df["net_margin_%"].pct_change() * 100




    # annual PE df
    if stock_PE_annual_consolidate_df.empty:
        stock_PE_annual_consolidate_df['fiscalDateEnding'] = annualEPS_df['fiscalDateEnding']

    stock_PE_annual_consolidate_df[f'{symbol}_PE'] = annualEPS_df["PE"].round(2)



    # reformat the columns order
    stock_ratios_annual_consolidate_df = stock_ratios_annual_consolidate_df[['fiscalDateEnding'] + [col for col in stock_ratios_annual_consolidate_df.columns if col != 'fiscalDateEnding']]
    stock_ratios_qtr_consolidate_df = stock_ratios_qtr_consolidate_df[['fiscalDateEnding'] + [col for col in stock_ratios_qtr_consolidate_df.columns if col != 'fiscalDateEnding']]




    # store the stock, dataframe value pair to the dictionary
    # transfer the pandas df to json
    stock_ratios_annual_consolidate_json = stock_ratios_annual_consolidate_df.to_dict()
    stock_ratios_qtr_consolidate_json = stock_ratios_qtr_consolidate_df.to_dict()

    stock_financial_dict[symbol] = {
    'annual': stock_ratios_annual_consolidate_json
    ,'qtr': stock_ratios_qtr_consolidate_json
    }

0 CE


In [21]:
qtr_balancesheet_df

Unnamed: 0,fiscalDateEnding,totalAssets,totalCurrentAssets,cashAndCashEquivalentsAtCarryingValue,cashAndShortTermInvestments,inventory,currentNetReceivables,totalNonCurrentAssets,propertyPlantEquipment,accumulatedDepreciationAmortizationPPE,intangibleAssets,intangibleAssetsExcludingGoodwill,goodwill,investments,longTermInvestments,shortTermInvestments,otherCurrentAssets,otherNonCurrentAssets,totalLiabilities,totalCurrentLiabilities,currentAccountsPayable,deferredRevenue,currentDebt,shortTermDebt,totalNonCurrentLiabilities,capitalLeaseObligations,longTermDebt,currentLongTermDebt,longTermDebtNoncurrent,shortLongTermDebtTotal,otherCurrentLiabilities,otherNonCurrentLiabilities,totalShareholderEquity,treasuryStock,retainedEarnings,commonStock,commonStockSharesOutstanding,totalAssets_YoY,totalCurrentAssets_YoY,cashAndCashEquivalentsAtCarryingValue_YoY,cashAndShortTermInvestments_YoY,inventory_YoY,currentNetReceivables_YoY,totalNonCurrentAssets_YoY,propertyPlantEquipment_YoY,accumulatedDepreciationAmortizationPPE_YoY,intangibleAssets_YoY,intangibleAssetsExcludingGoodwill_YoY,goodwill_YoY,investments_YoY,longTermInvestments_YoY,shortTermInvestments_YoY,otherCurrentAssets_YoY,otherNonCurrentAssets_YoY,totalLiabilities_YoY,totalCurrentLiabilities_YoY,currentAccountsPayable_YoY,deferredRevenue_YoY,currentDebt_YoY,shortTermDebt_YoY,totalNonCurrentLiabilities_YoY,capitalLeaseObligations_YoY,longTermDebt_YoY,currentLongTermDebt_YoY,longTermDebtNoncurrent_YoY,shortLongTermDebtTotal_YoY,otherCurrentLiabilities_YoY,otherNonCurrentLiabilities_YoY,totalShareholderEquity_YoY,treasuryStock_YoY,retainedEarnings_YoY,commonStock_YoY,commonStockSharesOutstanding_YoY,current_ratio,working_capital,longTermDebt_to_workingCp_ratio,debtEquity_ratio,quick_ratio
61,2009-06-30,7623000000,2778000000,1145000000,1151000000,473000000,702000000,4769000000,2533000000,1012000000,1116000000,328000000,788000000,,767000000,6000000.0,63000000,327000000,7377000000,1342000000,,,224000000,224000000,6033000000,,3268000000,,3268000000,3492000000,529000000,1254000000,246000000,781000000,1114000000,,143528126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.070045,1436000000,2.275766,29.987805,1.717586
60,2009-09-30,8250000000,2862000000,1293000000,1297000000,467000000,728000000,5305000000,2687000000,1084000000,1121000000,315000000,806000000,,811000000,4000000.0,85000000,328000000,7566000000,1473000000,558000000.0,,265000000,265000000,6091000000,,3312000000,,3312000000,3577000000,606000000,1270000000,684000000,781000000,1505000000,,143591231,8.225108,3.023758,12.925764,12.684622,-1.268499,3.703704,11.239254,6.079747,7.114625,0.448029,-3.963415,2.284264,,5.736636,-33.333333,34.920635,0.30581,2.562017,9.76155,,,18.303571,18.303571,0.961379,,1.346389,,1.346389,2.434135,14.555766,1.275917,178.04878,0.0,35.098743,,0.043967,1.942974,1389000000,2.384449,11.061404,1.625933
59,2009-12-31,8412000000,2856000000,1254000000,1257000000,522000000,721000000,5474000000,2797000000,1130000000,1092000000,294000000,798000000,,792000000,3000000.0,50000000,311000000,7826000000,1607000000,649000000.0,,242000000,242000000,6219000000,,3259000000,,3259000000,3501000000,611000000,1306000000,586000000,781000000,1505000000,,144127781,1.963636,-0.209644,-3.016241,-3.08404,11.777302,-0.961538,3.185674,4.093785,4.243542,-2.586976,-6.666667,-0.992556,,-2.342787,-25.0,-41.176471,-5.182927,3.436426,9.097081,16.308244,,-8.679245,-8.679245,2.101461,,-1.600242,,-1.600242,-2.124685,0.825083,2.834646,-14.327485,0.0,0.0,,0.373665,1.777225,1249000000,2.609287,13.354949,1.452396
58,2010-03-31,8202000000,2851000000,1139000000,1143000000,545000000,801000000,5272000000,2723000000,1122000000,1031000000,266000000,765000000,,764000000,4000000.0,42000000,266000000,7628000000,1540000000,626000000.0,,258000000,258000000,6088000000,,3233000000,,3233000000,3491000000,552000000,1224000000,574000000,781000000,1511000000,,150272227,-2.496434,-0.17507,-9.170654,-9.069212,4.40613,11.0957,-3.690172,-2.645692,-0.707965,-5.586081,-9.52381,-4.135338,,-3.535354,33.333333,-16.0,-14.469453,-2.530028,-4.169259,-3.543914,,6.61157,6.61157,-2.106448,,-0.797791,,-0.797791,-0.285633,-9.656301,-6.278714,-2.047782,0.0,0.398671,,4.263193,1.851299,1311000000,2.466056,13.289199,1.497403
57,2010-06-30,8105000000,2822000000,1081000000,1083000000,522000000,862000000,5208000000,2676000000,1111000000,1005000000,269000000,736000000,,769000000,2000000.0,70000000,273000000,7416000000,1510000000,607000000.0,,265000000,265000000,6881000000,,3162000000,,3162000000,3427000000,532000000,1139000000,689000000,801000000,1664000000,,156326226,-1.182638,-1.017187,-5.092186,-5.249344,-4.220183,7.615481,-1.213961,-1.726037,-0.980392,-2.521823,1.12782,-3.79085,,0.65445,-50.0,66.666667,2.631579,-2.779234,-1.948052,-3.035144,,2.713178,2.713178,13.025624,,-2.196103,,-2.196103,-1.833286,-3.623188,-6.944444,20.034843,2.560819,10.125745,,4.028688,1.868874,1312000000,2.410061,10.763425,1.523179
56,2010-09-30,8394000000,2767000000,884000000,886000000,578000000,897000000,5548000000,2884000000,1159000000,1056000000,271000000,785000000,,817000000,2000000.0,91000000,292000000,7477000000,1637000000,640000000.0,,261000000,261000000,6895000000,,3010000000,,3010000000,3271000000,589000000,1175000000,917000000,822000000,1801000000,,155859508,3.5657,-1.948972,-18.223867,-18.190212,10.727969,4.060325,6.528418,7.772795,4.320432,5.074627,0.743494,6.657609,,6.241873,0.0,30.0,6.959707,0.822546,8.410596,5.436573,,-1.509434,-1.509434,0.203459,,-4.807084,,-4.807084,-4.552086,10.714286,3.160667,33.091437,2.621723,8.233173,,-0.298554,1.690287,1130000000,2.663717,8.153762,1.337202
55,2010-12-31,8281000000,2668000000,740000000,818000000,610000000,827000000,5613000000,3017000000,1131000000,1026000000,252000000,774000000,916000000.0,838000000,78000000.0,59000000,289000000,7355000000,1542000000,673000000.0,,228000000,154000000,5813000000,,3064000000,74000000.0,2990000000,2990000000,596000000,1075000000,926000000,829000000,1851000000,,155737184,-1.3462,-3.577882,-16.289593,-7.674944,5.536332,-7.80379,1.171593,4.61165,-2.415876,-2.840909,-7.01107,-1.401274,,2.570379,3800.0,-35.164835,-1.027397,-1.63167,-5.803299,5.15625,,-12.643678,-40.996169,-15.692531,,1.79402,,-0.664452,-8.590645,1.188455,-8.510638,0.981461,0.851582,2.776235,,-0.078484,1.73022,1126000000,2.721137,7.942765,1.33463
54,2011-03-31,8622000000,2851000000,722000000,796000000,688000000,950000000,5771000000,3153000000,1203000000,1056000000,252000000,804000000,896000000.0,822000000,74000000.0,45000000,302000000,7486000000,1610000000,740000000.0,,219000000,219000000,6885000000,,3003000000,,3003000000,3003000000,554000000,1114000000,1136000000,832000000,1985000000,,155966259,4.11786,6.85907,-2.432432,-2.689487,12.786885,14.873035,2.814894,4.507789,6.366048,2.923977,0.0,3.875969,-2.183406,-1.909308,-5.128205,-23.728814,4.49827,1.781101,4.409857,9.955423,,-3.947368,42.207792,18.441424,,-1.990862,0.0,0.434783,0.434783,-7.04698,3.627907,22.678186,0.361882,7.23933,,0.147091,1.770807,1241000000,2.419823,6.589789,1.343478
53,2011-06-30,8919000000,3014000000,741000000,811000000,779000000,1027000000,5905000000,3273000000,1261000000,1051000000,238000000,813000000,908000000.0,838000000,70000000.0,63000000,309000000,7546000000,1641000000,786000000.0,,155000000,97000000,7078000000,,2951000000,58000000.0,2893000000,3048000000,575000000,1277000000,1373000000,842000000,2180000000,,156280721,3.444676,5.717292,2.631579,1.884422,13.226744,8.105263,2.321955,3.805899,4.82128,-0.473485,-5.555556,1.119403,1.339286,1.946472,-5.405405,40.0,2.317881,0.801496,1.925466,6.216216,,-29.223744,-55.707763,2.803195,,-1.731602,-21.621622,-3.663004,1.498501,3.790614,14.631957,20.862676,1.201923,9.823678,,0.201622,1.836685,1373000000,2.149308,5.495994,1.361974
52,2011-09-30,8711000000,2898000000,704000000,768000000,777000000,978000000,5813000000,3233000000,1284000000,993000000,213000000,780000000,905000000.0,841000000,64000000.0,71000000,334000000,7255000000,1613000000,713000000.0,,161000000,106000000,6763000000,,2948000000,55000000.0,2893000000,3054000000,583000000,1214000000,1456000000,857000000,2338000000,,156194459,-2.3321,-3.848706,-4.993252,-5.302096,-0.256739,-4.771178,-1.558002,-1.22212,1.823949,-5.518554,-10.504202,-4.059041,-0.330396,0.357995,-8.571429,12.698413,8.090615,-3.856348,-1.706277,-9.287532,,3.870968,9.278351,-4.45041,,-0.10166,-5.172414,0.0,0.19685,1.391304,-4.933438,6.045157,1.781473,7.247706,,-0.055197,1.796652,1285000000,2.294163,4.98283,1.314941


In [22]:
qtr_income_df

Unnamed: 0,fiscalDateEnding,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,investmentIncomeNet,netInterestIncome,interestIncome,interestExpense,nonInterestIncome,otherNonOperatingIncome,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome,grossProfit_QoQ,totalRevenue_QoQ,costOfRevenue_QoQ,costofGoodsAndServicesSold_QoQ,operatingIncome_QoQ,sellingGeneralAndAdministrative_QoQ,researchAndDevelopment_QoQ,operatingExpenses_QoQ,investmentIncomeNet_QoQ,netInterestIncome_QoQ,interestIncome_QoQ,interestExpense_QoQ,nonInterestIncome_QoQ,otherNonOperatingIncome_QoQ,depreciation_QoQ,depreciationAndAmortization_QoQ,incomeBeforeTax_QoQ,incomeTaxExpense_QoQ,interestAndDebtExpense_QoQ,netIncomeFromContinuingOperations_QoQ,comprehensiveIncomeNetOfTax_QoQ,ebit_QoQ,ebitda_QoQ,netIncome_QoQ,gross_margin_%,operating_margin_%,net_margin_%
65,2008-06-30,396000000,1868000000,1472000000,1472000000,207000000,138000000,18000000,189000000,,,,63000000,,1000000,,92000000,179000000,45000000,,203000000,,242000000,236000000,134000000,,,,,,,,,,,,,,,,,,,,,,,,,21.199143,11.08137,7.173448
64,2008-09-30,333000000,1823000000,1490000000,1490000000,151000000,142000000,18000000,170000000,,,,65000000,,4000000,,94000000,146000000,-12000000,,164000000,,211000000,221000000,158000000,-15.909091,-2.408994,1.222826,1.222826,-27.05314,2.898551,0.0,-10.05291,,,,3.174603,,300.0,,2.173913,-18.435754,-126.666667,,-19.211823,,-12.809917,-6.355932,17.910448,18.266594,8.28305,8.667032
63,2008-12-31,109000000,1286000000,1177000000,1177000000,-152000000,129000000,16000000,296000000,,,,66000000,,-6000000,,88000000,-199000000,-43000000,,-141000000,,-133000000,-24000000,-156000000,-67.267267,-29.456939,-21.006711,-21.006711,-200.662252,-9.15493,-11.111111,74.117647,,,,1.538462,,-250.0,,-6.382979,-236.30137,258.333333,,-185.97561,,-163.033175,-110.859729,-198.734177,8.475894,-11.819596,-12.130638
62,2009-03-31,200000000,1146000000,946000000,946000000,27000000,114000000,20000000,151000000,,,,51000000,,1000000,,74000000,-15000000,5000000,,-21000000,,36000000,124000000,-20000000,83.486239,-10.88647,-19.626168,-19.626168,-117.763158,-11.627907,25.0,-48.986486,,,,-22.727273,,-116.666667,,-15.909091,-92.462312,-111.627907,,-85.106383,,-127.067669,-616.666667,-87.179487,17.452007,2.356021,-1.745201
61,2009-06-30,248000000,1191000000,1137000000,996000000,89000000,114000000,18000000,214000000,58000000.0,-54000000.0,,54000000,1000000.0,2000000,,21000000,126000000,17000000,54000000.0,110000000,104000000.0,180000000,201000000,109000000,24.0,3.926702,20.190275,5.285412,229.62963,0.0,-10.0,41.721854,,,,5.882353,,100.0,,-71.621622,-940.0,240.0,,-623.809524,,400.0,62.096774,-645.0,20.822838,7.472712,9.151973
60,2009-09-30,266000000,1251000000,1264000000,1038000000,65000000,110000000,18000000,250000000,21000000.0,-51000000.0,,51000000,-2000000.0,-5000000,,20000000,48000000,-350000000,51000000.0,398000000,444000000.0,99000000,119000000,398000000,7.258065,5.037783,11.169745,4.216867,-26.966292,-3.508772,0.0,16.82243,-63.793103,-5.555556,,-5.555556,-300.0,-350.0,,-4.761905,-61.904762,-2158.823529,-5.555556,261.818182,326.923077,-45.0,-40.79602,265.137615,21.26299,5.195843,31.814548
59,2009-12-31,289000000,1338000000,1262000000,1099000000,109000000,136000000,14000000,232000000,18000000.0,-51000000.0,,51000000,1000000.0,6000000,,19000000,91000000,85000000,51000000.0,2000000,-31000000.0,142000000,161000000,6000000,8.646617,6.954436,-0.158228,5.876686,67.692308,23.636364,-22.222222,-7.2,-14.285714,0.0,,0.0,-150.0,-220.0,,-5.0,89.583333,-124.285714,0.0,-99.497487,-106.981982,43.434343,35.294118,-98.492462,21.599402,8.146487,0.44843
58,2010-03-31,218000000,1341000000,1385000000,1170000000,-14000000,124000000,18000000,283000000,28000000.0,-49000000.0,,49000000,2000000.0,6000000,,15000000,-6000000,-20000000,49000000.0,13000000,-35000000.0,43000000,58000000,14000000,-24.567474,0.224215,9.746434,6.460419,-112.844037,-8.823529,28.571429,21.982759,55.555556,-3.921569,,-3.921569,100.0,0.0,,-21.052632,-106.593407,-123.529412,-3.921569,550.0,12.903226,-69.71831,-63.975155,133.333333,16.256525,-1.043997,1.043997
57,2010-06-30,303000000,1468000000,1358000000,1214000000,156000000,124000000,17000000,196000000,73000000.0,-49000000.0,,49000000,15000000.0,-1000000,,15000000,221000000,61000000,49000000.0,163000000,107000000.0,270000000,285000000,160000000,38.990826,9.470544,-1.949458,3.760684,-1214.285714,0.0,-5.555556,-30.742049,160.714286,0.0,,0.0,650.0,-116.666667,,0.0,-3783.333333,-405.0,0.0,1153.846154,-405.714286,527.906977,391.37931,1042.857143,20.640327,10.626703,10.899183
56,2010-09-30,346000000,1457000000,1262000000,1160000000,221000000,125000000,17000000,172000000,1000000.0,-48000000.0,,48000000,-1000000.0,-4000000,,15000000,189000000,44000000,48000000.0,147000000,145000000.0,237000000,252000000,145000000,14.191419,-0.749319,-7.069219,-4.448105,41.666667,0.806452,0.0,-12.244898,-98.630137,-2.040816,,-2.040816,-106.6667,300.0,,0.0,-14.479638,-27.868852,-2.040816,-9.815951,35.514019,-12.222222,-11.578947,-9.375,23.747426,15.168154,9.951956


In [23]:
pd.DataFrame(stock_financial_dict['CE']['qtr']).tail(20)

Unnamed: 0,fiscalDateEnding,gross_margin_%,operating_margin_%,net_margin_%,netIncome,current_ratio,working_capital,longTermDebt_to_workingCp_ratio,debtEquity_ratio,quick_ratio,totalShareholderEquity,operatingCashflow,capitalExpenditures,cashflowFromInvestment,cashflowFromFinancing,ROE_%,liquidation_value,liquidation_mktcap_ratio_%,QoQ_gross_margin_%_%,QoQ_operating_margin_%_%,QoQ_net_margin_%_%
19,2019-12-31,20.968812,4.512276,2.853351,43000000.0,1.576397,1011000000.0,3.417409,2.779817,0.984607,2507000000.0,326000000.0,144000000.0,-168000000.0,-199000000.0,1.715197,1121000000.0,14.12,-21.240331,-73.013118,-83.129426
18,2020-03-31,24.318658,13.556953,15.234102,218000000.0,1.480809,927000000.0,3.667745,2.899101,0.943465,2448000000.0,259000000.0,119000000.0,-128000000.0,-16000000.0,8.905229,1090000000.0,13.73,15.975374,200.446006,433.902134
17,2020-06-30,20.754717,7.118353,9.176672,107000000.0,1.197613,447000000.0,7.680089,2.822276,0.741821,2487000000.0,379000000.0,88000000.0,-181000000.0,-232000000.0,4.302372,1072000000.0,13.51,-14.655172,-47.492971,-39.762302
16,2020-09-30,23.678494,13.323678,14.989138,207000000.0,1.15027,362000000.0,9.900552,2.895136,0.734745,2508000000.0,431000000.0,72000000.0,-78000000.0,-290000000.0,8.253589,1063000000.0,13.39,14.087289,87.173604,63.339582
15,2020-12-31,23.470662,12.67166,90.699126,1453000000.0,1.907248,1790000000.0,2.050838,2.093874,1.411556,3526000000.0,274000000.0,85000000.0,979000000.0,-933000000.0,41.208168,2041000000.0,25.71,-0.877725,-4.893679,505.099001
14,2021-03-31,27.385658,18.407679,18.181818,322000000.0,1.917313,1775000000.0,2.015775,2.036702,1.387597,3542000000.0,116000000.0,92000000.0,98000000.0,-371000000.0,9.090909,2100000000.0,26.46,16.680383,45.266513,-79.953701
13,2021-06-30,35.052971,26.116997,24.781207,538000000.0,1.907468,1932000000.0,1.654762,1.954186,1.382339,3798000000.0,427000000.0,110000000.0,177000000.0,-344000000.0,14.16535,2351000000.0,29.62,27.99755,41.880986,36.296637
12,2021-09-30,31.791908,23.832815,22.498888,506000000.0,2.403422,2543000000.0,1.483287,1.955601,1.763797,3919000000.0,630000000.0,102000000.0,-108000000.0,-228000000.0,12.911457,2485000000.0,31.31,-9.303244,-8.74596,-9.209876
11,2021-12-31,30.99742,22.226999,22.527945,524000000.0,1.51976,1302000000.0,2.858679,1.858677,0.911377,4189000000.0,584000000.0,163000000.0,-1286000000.0,-99000000.0,12.508952,2042000000.0,25.73,-2.499023,-6.737834,0.129147
10,2022-03-31,29.811925,21.248499,20.088035,502000000.0,1.644479,1646000000.0,2.232685,1.6972,1.03798,4607000000.0,316000000.0,137000000.0,-149000000.0,-95000000.0,10.896462,2496000000.0,31.45,-3.824498,-4.402303,-10.830592


In [88]:
# searching & merge process
YoY_EPS_screener = pd.DataFrame()

for symbol in stock_financial_dict.keys():
    annaul_df = pd.DataFrame(
        stock_financial_dict[symbol]['annual']
    )

    annaul_df = annaul_df[['fiscalDateEnding', 'YoY_reportedEPS_%', 'reportedEPS']].rename(
        columns={
            'YoY_reportedEPS_%': f'{symbol}_YoY_reportedEPS_%'
            ,'reportedEPS': f'{symbol}_reportedEPS'
            }
    )
    annaul_df['fiscalDateEnding'] = pd.to_datetime(annaul_df['fiscalDateEnding']).dt.year


    if YoY_EPS_screener.empty:
        YoY_EPS_screener = annaul_df
    else:
        YoY_EPS_screener = pd.merge(
            YoY_EPS_screener
            ,annaul_df
            ,on='fiscalDateEnding'
            ,how='outer'
        )


In [89]:
YoY_EPS_screener

Unnamed: 0,fiscalDateEnding,KR_YoY_reportedEPS_%,KR_reportedEPS
14,2010,,0.0457
13,2011,1799.781182,0.8682
12,2012,-45.081778,0.4768
11,2013,192.093121,1.3927
10,2014,4.200474,1.4512
9,2015,54.575524,2.2432
8,2016,-2.1398,2.1952
7,2017,-7.34785,2.0339
6,2018,0.299916,2.04
5,2019,2.941176,2.1


In [95]:
stock_PE_annual_consolidate_df

Unnamed: 0,fiscalDateEnding,KR_PE
0,2024,12.85
1,2023,10.83
2,2022,12.11
3,2021,13.04
4,2020,14.44
5,2019,13.8
6,2018,13.48
7,2017,13.5
8,2016,15.72
9,2015,18.65


# SP500 sectors

In [43]:
# Function to get the list of S&P 500 companies and their sectors
def get_sp500_companies():
    # Fetch the S&P 500 company symbols and sectors from a reliable source (e.g., Wikipedia)
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    
    # Extract the relevant table containing the company symbols and sectors
    sp500_df = tables[0]
    
    # Return the DataFrame containing S&P 500 companies and sectors
    return sp500_df[['Symbol', 'GICS Sector', 'GICS Sub-Industry']]


# Function to create a dictionary of sectors and sub-sectors
def create_sector_subsector_dict(df):
    sector_subsector_dict = {}
    for index, row in df.iterrows():
        sector = row['GICS Sector']
        subsector = row['GICS Sub-Industry']
        if sector not in sector_subsector_dict:
            sector_subsector_dict[sector] = [subsector]
        else:
            sector_subsector_dict[sector].append(subsector)
    return sector_subsector_dict

# Function to filter the S&P 500 companies by sector
def company_sector_list(df, sector):
    return df[df['GICS Sector'] == sector]['Symbol'].tolist()

def company_sub_sector_list(df, sub_sector):
    return df[df['GICS Sub-Industry'] == sub_sector]['Symbol'].tolist()


# Get the list of S&P 500 companies and their sectors
sp500_df  = get_sp500_companies()

sp500_companies_sectors = sp500_df ['GICS Sector'].value_counts().index
sp500_companies_sub_sectors = sp500_df ['GICS Sub-Industry'].value_counts().index

sector_subsector_dict = create_sector_subsector_dict(sp500_df)


# Function to create a DataFrame from the sector_subsector_dict
def create_sector_dataframe():
    # Create a list to store dictionacompany_sector_listries representing each row of data
    data = []
    
    # Filter the DataFrame to get stocks in the specified sector
    for sector in sp500_companies_sectors:
        sector_stocks_list = company_sector_list(sp500_df, sector)

        # Iterate over the stocks in the sector and create a dictionary for each
        for i, ticker in enumerate(sector_stocks_list, start=1):
            # Create a dictionary for the current stock in the sector
            row_data = {'Sector': sector, 'Ticker': ticker}
            # Append the dictionary to the list
            data.append(row_data)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    return df


sector_subsector_dict = create_sector_subsector_dict(sp500_df)


sector_ticker_df = create_sector_dataframe()
sector_ticker_list = sector_ticker_df[sector_ticker_df['Sector'] == 'Consumer Staples']['Ticker'].tolist()

In [44]:
# Pivot the DataFrame
pivot_sector_ticker_df = sector_ticker_df.groupby('Sector')['Ticker'].apply(list).reset_index()

# Transpose to get sectors as columns
pivot_sector_ticker_df = pivot_sector_ticker_df.set_index('Sector').T

pivot_sector_ticker_df

Sector,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Utilities
Ticker,"[GOOGL, GOOG, T, CHTR, CMCSA, EA, FOXA, FOX, I...","[ABNB, AMZN, APTV, AZO, BBY, BKNG, BWA, CZR, K...","[MO, ADM, BF.B, BG, CPB, CHD, CLX, KO, CL, CAG...","[APA, BKR, CVX, COP, CTRA, DVN, FANG, EOG, EQT...","[AFL, ALL, AXP, AIG, AMP, AON, APO, ACGL, AJG,...","[ABT, ABBV, A, ALGN, AMGN, BAX, BDX, TECH, BII...","[MMM, AOS, ALLE, AME, ADP, AXON, BA, BR, BLDR,...","[ACN, ADBE, AMD, AKAM, APH, ADI, ANSS, AAPL, A...","[APD, ALB, AMCR, AVY, BALL, CE, CF, CTVA, DOW,...","[ARE, AMT, AVB, BXP, CPT, CBRE, CSGP, CCI, DLR...","[AES, LNT, AEE, AEP, AWK, ATO, CNP, CMS, ED, C..."


In [45]:
pivot_sector_ticker_df['Consumer Staples'].iloc[0]

['MO',
 'ADM',
 'BF.B',
 'BG',
 'CPB',
 'CHD',
 'CLX',
 'KO',
 'CL',
 'CAG',
 'STZ',
 'COST',
 'DG',
 'DLTR',
 'EL',
 'GIS',
 'HSY',
 'HRL',
 'K',
 'KVUE',
 'KDP',
 'KMB',
 'KHC',
 'KR',
 'LW',
 'MKC',
 'TAP',
 'MDLZ',
 'MNST',
 'PEP',
 'PM',
 'PG',
 'SJM',
 'SYY',
 'TGT',
 'TSN',
 'WBA',
 'WMT']

In [46]:
len(pivot_sector_ticker_df['Financials'].iloc[0])

73

# Stock PE by years by tickers

In [59]:
stock_financial_dict = {} # the dict will store ticker as key, the string value of 'annual_df' and 'qtr_df' as nested key, actual json as value
stock_PE_annual_consolidate_df = pd.DataFrame()

ticker_symbols = [
    'MO',
    'ADM',
    # 'BF.B', # DATA ISSUE
    'BG',
    'CPB',
    'CHD',
    'CLX',
    'KO',
    'CL',
    'CAG',
    'STZ', # !!
    'COST',
    'DG',
    'DLTR', # !!
    'EL',
    'GIS',
    'HSY',
    'HRL',
    'K',
    # 'KVUE',
    'KDP',
    'KMB',
    # 'KHC',
    'KR', # DATA ISSUE
    # 'LW',
    'MKC', # DATA ISSUE
    'TAP',
    'MDLZ',
    'MNST',
    'PEP',
    'PM',
    'PG',
    'SJM',
    'SYY',
    'TGT', # !!
    'TSN', # !!
    'WBA', # DATA ISSUE
    'WMT'
]
# ticker_symbols = electric_utility_symbols
PE_yr_range = 15 # test for 'X' years PE 

In [60]:

# storage process
for j, symbol in enumerate(ticker_symbols):
    print(j, symbol)
    # COMPANY OVERVIEW
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    
    for key, value in data.items():
        if key  == 'MarketCapitalization':
            stock_mkt_cap = float(value)



    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()



    # Monthly quote section
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Monthly Time Series':
            Monthly_stock_df = pd.DataFrame(value)


    Monthly_stock_df = Monthly_stock_df.transpose()
    Monthly_stock_df.index = pd.to_datetime(Monthly_stock_df.index)


    filter_1 = (Monthly_stock_df.index.year.isin(range((datetime.today().year - PE_yr_range) ,datetime.today().year)))
    filter_2 = (Monthly_stock_df.index.month == 12) # month = 12 to get the year end closing price

    selected_cols = [
        '4. close'
    ]

    Monthly_stock_df = Monthly_stock_df[
        filter_1
        & filter_2
    ][selected_cols]

    # Rename columns
    Monthly_stock_df.rename(
        columns={
            '4. close': 'stock_price'
            }
        ,inplace=True
        )

    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
    Monthly_stock_df["stock_price"] = Monthly_stock_df["stock_price"].round(2)

    # modify stock price based on stock split
    for year_i in Monthly_stock_df.index.year:
        for year_j in stock_split_record_df['effective_date'].dt.year:
            if year_i == year_j:

                # stock price to divided the split factor
                Monthly_stock_df.loc[Monthly_stock_df.index.year < year_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.year == year_j].values[0])
    


    # Earning section
    # past earnings from alpha vintage API
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year

            annualEPS_df = annualEPS_df[
                annualEPS_df['fiscalDateEnding'].isin(
                    range(
                        (datetime.today().year - PE_yr_range) 
                        ,datetime.today().year
                            )
                            )
                            ]

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=False).reset_index(drop=True)

            # calculate PE
            annualEPS_df["PE"] = Monthly_stock_df["stock_price"].values / annualEPS_df['reportedEPS'].values


    ####################
    ### Consolidated ###
    ####################

    # annual PE df
    if stock_PE_annual_consolidate_df.empty:
        stock_PE_annual_consolidate_df['fiscalDateEnding'] = annualEPS_df['fiscalDateEnding']

    stock_PE_annual_consolidate_df[f'{symbol}_PE'] = annualEPS_df["PE"].round(2)


stock_PE_annual_consolidate_df['industry_avg_PE'] = stock_PE_annual_consolidate_df.iloc[:, 1:].sum(axis=1) / (stock_PE_annual_consolidate_df.shape[1] -1)  # Sum by row


0 MO
1 ADM
2 BG
3 CPB
4 CHD
5 CLX
6 KO
7 CL
8 CAG
9 STZ
10 COST
11 DG
12 DLTR
13 EL
14 GIS
15 HSY
16 HRL
17 K
18 KDP
19 KMB
20 KR
21 MKC
22 TAP
23 MDLZ
24 MNST
25 PEP
26 PM
27 PG
28 SJM
29 SYY
30 TGT
31 TSN
32 WBA
33 WMT


In [61]:
stock_PE_annual_consolidate_df

Unnamed: 0,fiscalDateEnding,MO_PE,ADM_PE,BG_PE,CPB_PE,CHD_PE,CLX_PE,KO_PE,CL_PE,CAG_PE,STZ_PE,COST_PE,DG_PE,DLTR_PE,EL_PE,GIS_PE,HSY_PE,HRL_PE,K_PE,KDP_PE,KMB_PE,KR_PE,MKC_PE,TAP_PE,MDLZ_PE,MNST_PE,PEP_PE,PM_PE,PG_PE,SJM_PE,SYY_PE,TGT_PE,TSN_PE,WBA_PE,WMT_PE,industry_avg_PE
0,2024,13.62,14.11,11.01,13.55,39.07,26.28,26.72,33.92,10.39,18.33,56.01,10.03,12.7,28.84,14.11,25.35,19.85,27.63,23.97,22.59,12.85,35.46,12.27,21.33,43.44,24.53,24.07,25.44,11.08,17.74,15.14,18.53,3.24,16.61,21.465
1,2023,8.15,10.36,7.38,14.41,30.5,28.01,21.91,24.68,10.38,22.72,60.61,12.73,19.7,42.39,15.15,19.44,19.82,13.44,18.61,18.49,10.83,25.43,11.27,21.88,36.46,22.29,15.65,24.84,14.17,18.24,23.7,40.41,6.54,8.37,20.557647
2,2022,9.44,11.84,7.17,19.98,47.98,34.14,25.65,26.53,16.33,22.72,44.11,24.24,24.39,34.36,21.28,27.18,24.89,15.81,21.23,24.11,12.11,32.76,12.57,22.37,22.76,26.61,16.98,26.09,17.82,23.45,11.0,7.13,7.43,7.33,21.464412
3,2021,10.28,13.02,7.21,14.58,33.83,22.3,25.41,26.67,12.94,25.17,63.43,22.23,24.83,57.4,17.78,26.95,28.21,14.51,23.04,23.09,13.04,31.68,11.17,23.27,18.68,27.79,15.65,28.9,14.93,54.55,24.54,10.53,10.37,8.71,23.137941
4,2020,9.4,14.04,8.14,16.33,30.82,27.47,28.12,28.04,15.9,23.99,56.57,31.2,22.65,64.92,16.29,24.22,27.91,14.61,22.86,17.42,14.44,33.66,11.53,22.32,17.25,26.87,15.98,27.18,13.2,37.32,27.58,11.15,8.41,9.73,22.868235
5,2019,11.86,17.56,15.77,18.86,28.37,24.22,26.23,24.33,17.03,20.45,35.68,25.87,17.26,38.68,16.63,25.39,25.93,16.48,23.73,19.99,13.8,31.67,11.87,22.3,15.35,24.76,16.36,27.57,12.56,24.1,23.7,16.67,9.83,8.05,20.850294
6,2018,12.38,11.71,20.24,11.49,28.97,24.94,22.66,20.04,10.08,18.46,28.62,23.55,18.55,28.85,12.52,20.0,23.98,12.33,8.9,17.24,13.48,27.96,11.14,16.47,13.67,19.55,13.09,21.78,11.6,20.82,14.0,8.67,11.35,7.04,17.239118
7,2017,21.06,16.43,31.79,15.93,25.86,27.54,24.02,26.29,19.32,33.86,40.03,20.71,28.39,36.56,19.25,23.85,23.03,15.8,22.11,19.37,13.5,24.04,15.17,19.91,22.77,23.2,22.43,23.44,16.11,24.89,13.02,15.27,14.24,7.6,21.964412
8,2016,22.32,21.04,15.6,20.64,24.97,24.15,21.71,23.29,16.76,28.23,39.05,18.66,44.03,23.9,21.15,23.45,21.23,18.41,20.61,18.93,15.72,24.43,30.99,23.21,17.05,29.98,20.42,22.91,21.34,26.75,15.37,14.05,18.03,5.02,22.158824
9,2015,20.86,14.22,14.17,21.45,26.04,27.94,21.48,23.71,19.34,32.08,39.39,20.48,25.25,31.01,20.16,21.67,29.51,19.17,23.18,22.1,18.65,24.45,24.98,25.48,25.08,21.91,19.89,19.61,22.88,22.16,20.63,16.93,21.89,4.03,22.405294


## Appendix

In [131]:
# Consumer Staples

ticker_symbols = [
    'MO',
    'ADM',
    # 'BF.B', # DATA ISSUE
    'BG',
    'CPB',
    'CHD',
    'CLX',
    'KO',
    'CL',
    'CAG',
    'STZ', # !!
    'COST',
    'DG',
    'DLTR', # !!
    'EL',
    'GIS',
    'HSY',
    'HRL',
    'K',
    'KVUE',
    'KDP',
    'KMB',
    'KHC',
    'KR', # DATA ISSUE
    'LW',
    'MKC', # DATA ISSUE
    'TAP',
    'MDLZ',
    'MNST',
    'PEP',
    'PM',
    'PG',
    'SJM',
    'SYY',
    'TGT', # !!
    'TSN', # !!
    'WBA', # DATA ISSUE
    'WMT'
]

In [132]:
# Energy

ticker_symbols = [
'APA',
 'BKR', # !! HIGH PE, LOW PROFITABILITY, HIGH FORECAST,  BUT CANNOT BEAT FORECAST, 总结与TRGP类似, 基本面更稳妥, 盈利预期高, 有待考证
 'CVX',
 'COP',
 'CTRA',
 'DVN',
 'FANG',
 'EOG',
 'EQT',
 'XOM',
 'HAL', # !!!! 与SLB类似
 'HES', # !  HIGH PE, UNDERVALUE, 市场炒作高预期, 2024表现强劲, 已经被超热了
 'KMI',
 'MRO',
 'MPC',
 'OXY',
 'OKE', # !!
 'PSX',
 'SLB', # !!! A BIT HIGH PE, GOOD ROE, MED PROFITABILITY, STEADY GROWTH FORECAST, AND CAN BEAT FORECAST, 总结基本面良好，与HAL类型相似, 市场炒作热度比HAL更高, 值得研究
 'TRGP', # !！ HGIH PE, HIGH ROE, RELATIVE MORE DEBT THAN EQUITY, LOW PROFITABILITY, HIGH FORECAST, BUT CANNOT BEAT FORECAST, 总结基本面一般, 但是盈利预期非常高, 有待考证
 'VLO',
 'WMB'
 ]

In [135]:
electric_utility_symbols = [
'CEPU'
,'OTTR'
,'PAM'
,'SO' ##
,'DUK'
,'NRG'
,'IDA'
,'DTE' #
,'CEG' ##
,'SRE'
,'ETR'
,'PEG'
,'MGEE'
,'LNT'
,'BKH'
]