In [242]:
import os
import pandas as pd
from decimal import Decimal
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf

import matplotlib.pyplot as plt
import plotly.express as px
import pytz  # Make sure to import pytz for timezone handling
import seaborn as sns

import requests

import warnings

In [260]:
# Function to get the list of S&P 500 companies and their sectors
def get_sp500_companies():
    # Fetch the S&P 500 company symbols and sectors from a reliable source (e.g., Wikipedia)
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    
    # Extract the relevant table containing the company symbols and sectors
    sp500_df = tables[0]
    
    # Return the DataFrame containing S&P 500 companies and sectors
    return sp500_df[['Symbol', 'GICS Sector', 'GICS Sub-Industry']]


# Function to create a dictionary of sectors and sub-sectors
def create_sector_subsector_dict(df):
    sector_subsector_dict = {}
    for index, row in df.iterrows():
        sector = row['GICS Sector']
        subsector = row['GICS Sub-Industry']
        if sector not in sector_subsector_dict:
            sector_subsector_dict[sector] = [subsector]
        else:
            sector_subsector_dict[sector].append(subsector)
    return sector_subsector_dict

# Function to filter the S&P 500 companies by sector
def company_sector_list(df, sector):
    return df[df['GICS Sector'] == sector]['Symbol'].tolist()

def company_sub_sector_list(df, sub_sector):
    return df[df['GICS Sub-Industry'] == sub_sector]['Symbol'].tolist()


# Get the list of S&P 500 companies and their sectors
sp500_df  = get_sp500_companies()

sp500_companies_sectors = sp500_df ['GICS Sector'].value_counts().index
sp500_companies_sub_sectors = sp500_df ['GICS Sub-Industry'].value_counts().index

sector_subsector_dict = create_sector_subsector_dict(sp500_df)


# Function to create a DataFrame from the sector_subsector_dict
def create_sector_dataframe():
    # Create a list to store dictionacompany_sector_listries representing each row of data
    data = []
    
    # Filter the DataFrame to get stocks in the specified sector
    for sector in sp500_companies_sectors:
        sector_stocks_list = company_sector_list(sp500_df, sector)

        # Iterate over the stocks in the sector and create a dictionary for each
        for i, ticker in enumerate(sector_stocks_list, start=1):
            # Create a dictionary for the current stock in the sector
            row_data = {'Sector': sector, 'Ticker': ticker}
            # Append the dictionary to the list
            data.append(row_data)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    return df


sector_subsector_dict = create_sector_subsector_dict(sp500_df)


sector_ticker_df = create_sector_dataframe()
energy_sector_ticker_list = sector_ticker_df[sector_ticker_df['Sector'] == 'Energy']['Ticker'].tolist()

In [261]:
# # Ask the user to input stock tickers separated by a comma
# tickers_input = input("Enter stock tickers separated by commas:")

# # Split the input string into a list of tickers
# tickers = tickers_input.split(',')

In [262]:
# enable dynamic query

# input_security_dim = input()
# input_security_func = input()

In [263]:
energy_sector_ticker_list

['APA',
 'BKR',
 'CVX',
 'COP',
 'CTRA',
 'DVN',
 'FANG',
 'EOG',
 'EQT',
 'XOM',
 'HAL',
 'HES',
 'KMI',
 'MRO',
 'MPC',
 'OXY',
 'OKE',
 'PSX',
 'SLB',
 'TRGP',
 'VLO',
 'WMB']

In [281]:
# Parameters section

alpha_vantage_api_key = 'YI2V50P8VRQ3HFKM' # FREE TIER API rate limit is 25 requests per day
alpha_vantage_function = {
    'core':[
        'TIME_SERIES_INTRADA'
        ,'TIME_SERIES_DAILY' # this is daily time series quote
        ,'TIME_SERIES_DAILY_ADJUSTED' # this is daily time series adjusted by split/dividend-adjusted
        ,'GLOBAL_QUOTE'
    ]
    ,'fundmental':[
    'INCOME_STATEMENT'
    ,'BALANCE_SHEET' # this is daily time series quote
    ,'CASH_FLOW' # this is daily time series adjusted by split/dividend-adjusted
    ,'EARNINGS'
    ,'EARNINGS_CALENDAR'
]
}

# Define the ticker symbols as a list; eg. TSM,MSFT,AMZN
ticker_symbols = input("Enter stock tickers separated by commas:") 
ticker_symbols = ticker_symbols.split(',')
# ticker_symbols = energy_sector_ticker_list

# Time intelligent parameters
window_days = 90
end_date = datetime.now()
start_date = end_date - timedelta(days=window_days)

# Create an empty DataFrame to store the earning data
earnings_data = {}
list_1 = []


# Daily quote section
for symbol in ticker_symbols:

    # Daily quote section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Time Series (Daily)':

            selected_cols = [
                '4. close'
            ]

            df_stock_base = pd.DataFrame(value).transpose()[selected_cols] # tranpose the dataframe and sub select selected cols

            # Rename columns
            df_stock_base.rename(
                columns={
                    '4. close': f'{symbol}'
                    }
                ,inplace=True
                )
            
            df_stock_base[f'{symbol}'] = df_stock_base[f'{symbol}'].astype(str).apply(lambda x: float(x))
            df_stock_base[f'{symbol}'] = df_stock_base[f'{symbol}'].round(2)

    # Earning section
    # past earnings from alpha vintage API
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            df_stock_annualEPS = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


        if key == 'quarterlyEarnings':

            selected_cols = [
                'reportedEPS'
            ]

            df_stock_qtrEPS = pd.DataFrame(value)[selected_cols] # tranpose the dataframe and sub select selected cols

            # Convert the column to decimal type
            for col in selected_cols:
                df_stock_qtrEPS[f'{col}'] = df_stock_qtrEPS[f'{col}'].astype(str).apply(lambda x: float(x))


    # # forecast 1 qtr earnings from yahoofin API
    # # symbol's earning related
    # stock = yf.Ticker(symbol)
    # earnings_dates = stock.get_earnings_dates(limit=4)  # Fetch earnings dates for the past 4 quarters and forecast for the next 4 quarters
    # earnings_data[symbol] = earnings_dates


    # forecast 1 year earnings from nasdaq webscrapping




    # Consolidated section
    df_stock_consolidate = df_stock_base.head(window_days)

    # Select the first four rows from the past_4_qtrs_EPS
    past_4_qtrs_EPS = df_stock_qtrEPS.head(4) 

    # Calculate the sum of the numeric values in the selected rows
    EPS_TTM = past_4_qtrs_EPS['reportedEPS'].values.sum()

    df_stock_consolidate[f"{symbol}_EPS_TTM"] = EPS_TTM



    # stock's stats
    df_stock_consolidate[f"{symbol}_PE_TTM"] = (df_stock_consolidate[symbol] / df_stock_consolidate[f"{symbol}_EPS_TTM"]).round(2)
    df_stock_consolidate[f"{symbol}_PE_TTM_avg"] = df_stock_consolidate[f"{symbol}_PE_TTM"].mean().round(2)
    df_stock_consolidate[f"{symbol}_PE_TTM_std"] = np.std(df_stock_consolidate[f"{symbol}_PE_TTM"]).round(2)
    df_stock_consolidate[f"{symbol}_PE_TTM_volatility_+"] = (df_stock_consolidate[f"{symbol}_PE_TTM_avg"] + df_stock_consolidate[f"{symbol}_PE_TTM_std"]).round(2) # 这个是PE的波动范围上限
    df_stock_consolidate[f"{symbol}_PE_TTM_volatility_-"] = (df_stock_consolidate[f"{symbol}_PE_TTM_avg"] - df_stock_consolidate[f"{symbol}_PE_TTM_std"]).round(2) # 这个是PE的波动范围下限

    df_stock_consolidate[f"{symbol}_relative_valuation_+"] = (df_stock_consolidate[f"{symbol}_PE_TTM_volatility_+"] * df_stock_consolidate[f"{symbol}_EPS_TTM"]).round(2) # 这个是relative valuation的价格上限
    df_stock_consolidate[f"{symbol}_relative_valuation_-"] = (df_stock_consolidate[f"{symbol}_PE_TTM_volatility_-"] * df_stock_consolidate[f"{symbol}_EPS_TTM"]).round(2) # 这个是relative valuation的价格下限
    df_stock_consolidate[f"{symbol}_relative_valuation_median"] = (np.median([df_stock_consolidate[f"{symbol}_relative_valuation_+"].head(1), df_stock_consolidate[f"{symbol}_relative_valuation_-"].head(1)])).round(2) #这个是根据最新TTM PE估值的价格中位数

    # df_stock_consolidate[f"{symbol}_relative_valuation_projected_+"] = (df_stock_consolidate[f"{symbol}_PE_TTM_volatility_+"] * df_stock_consolidate[f"{symbol}_EPS_latest_projected"]).round(2) # 这个是relative valuation的价格上限
    # df_stock_consolidate[f"{symbol}_relative_valuation_projected_-"] = (df_stock_consolidate[f"{symbol}_PE_TTM_volatility_-"] * df_stock_consolidate[f"{symbol}_EPS_latest_projected"]).round(2) # 这个是relative valuation的价格下限
    # df_stock_consolidate[f"{symbol}_relative_valuation_projected_median"] = (np.median([df_stock_consolidate[f"{symbol}_relative_valuation_projected_+"], df_stock_consolidate[f"{symbol}_relative_valuation_projected_-"]]).round(2) #这个是根据3 qtrs post EPS + 1 projected EPS 得出PE估值的价格中位数

    df_stock_consolidate[f"{symbol}_{window_days}_price_min"] = df_stock_consolidate[symbol].min().round(2)
    df_stock_consolidate[f"{symbol}_{window_days}_price_max"] = df_stock_consolidate[symbol].max().round(2)
    df_stock_consolidate[f"{symbol}_{window_days}_price_avg"] = df_stock_consolidate[symbol].mean().round(2)
    df_stock_consolidate[f"{symbol}_{window_days}_price_std"] = np.std(df_stock_consolidate[symbol]).round(2)



    conditions = [
    (df_stock_consolidate[f"{symbol}"] < df_stock_consolidate[f"{symbol}_relative_valuation_-"]),
    (df_stock_consolidate[f"{symbol}"] > df_stock_consolidate[f"{symbol}_relative_valuation_+"]),
    ((df_stock_consolidate[f"{symbol}"] >= df_stock_consolidate[f"{symbol}_relative_valuation_-"]) & (df_stock_consolidate[f"{symbol}"] <= df_stock_consolidate[f"{symbol}_relative_valuation_+"])),
    ]

    categories = [
        'undervalued'
        ,'overvalued'
        ,'fair'
        ]

    # This KPI assess if the current stock price is under/over/fair to the current relative valuation
    df_stock_consolidate[f"{symbol}_curr_assessment"] = None

    for condition, category in zip(conditions, categories):
        df_stock_consolidate.loc[condition, f"{symbol}_price_valuation_assessment"] = category




    selected_cols = [
        f"{symbol}"
        ,f"{symbol}_PE_TTM"
        ,f"{symbol}_PE_TTM_avg"
        ,f"{symbol}_relative_valuation_+"
        ,f"{symbol}_relative_valuation_-"
        ,f"{symbol}_relative_valuation_median"
        # ,f"{symbol}_relative_valuation_projected_+"
        # ,f"{symbol}_relative_valuation_projected_-"
        # ,f"{symbol}_relative_valuation_projected_median"
        ,f"{symbol}_price_valuation_assessment"
    ]

    print(df_stock_consolidate[selected_cols])


    
# # Reset index to make it cleaner
# df_stock_consolidate.reset_index(inplace=True)

# # Example usage: Display the first few rows of the adjusted DataFrame
# df_stock_consolidate.sort_values(by='Date', ascending=False)

# Addtional setting session
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")
pd.options.mode.copy_on_write = True

# selected_cols = [
#     f"{symbol}"
#     ,f"{symbol}_PE_TTM"
#     ,f"{symbol}_PE_TTM_avg"
#     ,f"{symbol}_relative_valuation_+"
#     ,f"{symbol}_relative_valuation_-"
#     ,f"{symbol}_relative_valuation_median"
#     # ,f"{symbol}_relative_valuation_projected_+"
#     # ,f"{symbol}_relative_valuation_projected_-"
#     # ,f"{symbol}_relative_valuation_projected_median"
#     ,f"{symbol}_price_valuation_assessment"
# ]

# # df_stock_consolidate.columns
# df_stock_consolidate[selected_cols]

                L  L_PE_TTM  L_PE_TTM_avg  L_relative_valuation_+   
2024-05-13  77.34     11.47         11.08                   76.78  \
2024-05-10  77.98     11.57         11.08                   76.78   
2024-05-09  77.70     11.53         11.08                   76.78   
2024-05-08  77.31     11.47         11.08                   76.78   
2024-05-07  77.81     11.54         11.08                   76.78   
2024-05-06  77.74     11.53         11.08                   76.78   
2024-05-03  76.40     11.33         11.08                   76.78   
2024-05-02  76.53     11.35         11.08                   76.78   
2024-05-01  76.28     11.32         11.08                   76.78   
2024-04-30  75.15     11.15         11.08                   76.78   
2024-04-29  75.82     11.25         11.08                   76.78   
2024-04-26  75.46     11.19         11.08                   76.78   
2024-04-25  76.41     11.33         11.08                   76.78   
2024-04-24  76.81     11.39       