In [215]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf

import matplotlib.pyplot as plt
import plotly.express as px
import pytz  # Make sure to import pytz for timezone handling
import seaborn as sns

In [209]:
# Function to get the list of S&P 500 companies and their sectors
def get_sp500_companies():
    # Fetch the S&P 500 company symbols and sectors from a reliable source (e.g., Wikipedia)
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    
    # Extract the relevant table containing the company symbols and sectors
    sp500_df = tables[0]
    
    # Return the DataFrame containing S&P 500 companies and sectors
    return sp500_df[['Symbol', 'GICS Sector']]

# Function to filter the S&P 500 companies by sector
def filter_energy_sector(df):
    return df[df['GICS Sector'] == 'Energy']['Symbol'].tolist()

# Get the list of S&P 500 companies and their sectors
sp500_companies = get_sp500_companies()

# Filter the S&P 500 companies to include only the ones in the energy sector
energy_stocks = filter_energy_sector(sp500_companies)

# Print the list of S&P 500 energy sector stocks
print(energy_stocks)


['APA', 'BKR', 'CVX', 'COP', 'CTRA', 'DVN', 'FANG', 'EOG', 'EQT', 'XOM', 'HAL', 'HES', 'KMI', 'MRO', 'MPC', 'OXY', 'OKE', 'PSX', 'PXD', 'SLB', 'TRGP', 'VLO', 'WMB']


In [253]:
# Define the ticker symbols as a list
ticker_symbols = ['TSM']
window_days = 180
# Define the date range
end_date = datetime.now()
start_date = end_date - timedelta(days=window_days)

# Create an empty DataFrame to store the adjusted data
adjusted_stock_data = pd.DataFrame()
earnings_data = {}  # Dictionary to store earnings dates for each ticker
metric_list = []

# Fetch historical data for each ticker symbol and adjust the format
for symbol in ticker_symbols:

    # symbol's historical price related
    data = yf.download(symbol, start=start_date, end=end_date)
    data = data.sort_index(ascending=False)
    data.index = [date.date() for date in data.index.sort_values(ascending=False)] # convert original timestamp index to date index

    # symbol's earning related
    stock = yf.Ticker(symbol)
    earnings_dates = stock.get_earnings_dates(limit=12)  # Fetch earnings dates for the past 4 quarters and forecast for the next 4 quarters
    earnings_data[symbol] = earnings_dates


    # Extract close prices and add them to the adjusted DataFrame with ticker symbol as column name
    adjusted_stock_data[symbol] = data['Close'].round(2)
    adjusted_stock_data[symbol + "_EPS_TTM"] = None


    # EPS TTM calculation
    EPS_TTM_start_date = data.index
    for i in EPS_TTM_start_date:
        for key, value in earnings_data.items():
            if key == symbol:
                symbol_hist_EPS = value['Reported EPS'][value['Reported EPS'].notnull()]
                symbol_hist_EPS.index = [date.date() for date in symbol_hist_EPS.index.sort_values(ascending=False)] # convert timestamp index to a date index(sorted, descending)

                symbol_projected_EPS = value['EPS Estimate'][value['EPS Estimate'].notnull()]
                symbol_projected_EPS.index = [date.date() for date in symbol_projected_EPS.index.sort_values(ascending=False)] # convert timestamp index to a date index(sorted, descending)

                # Filter the DataFrame to include only dates(index) less than or equal to the target date
                filtered_symbol_hist_EPS = symbol_hist_EPS[symbol_hist_EPS.index <= i]
                
                # Select the first four rows from the past_4_qtrs_EPS
                past_4_qtrs_EPS = filtered_symbol_hist_EPS.head(4)      

                # Select the first rows of estimate to be the latest qtrly projected EPS
                latest_projected_EPS = symbol_projected_EPS.head(1) 
                
                # # Calculate the sum of the numeric values in the selected rows
                EPS_TTM = past_4_qtrs_EPS.sum() 
                EPS_latest_projected = latest_projected_EPS.sum() + filtered_symbol_hist_EPS.head(3).sum()  # This metrics is the past 3 qtrs post EPS + 1 projected EPS


                adjusted_stock_data.loc[i, f"{symbol}_EPS_TTM"] = EPS_TTM
                adjusted_stock_data[f"{symbol}_EPS_latest_projected"] = EPS_latest_projected

            else:
                continue
            

    # stock's stats

    adjusted_stock_data[f"{symbol}_PE_TTM"] = adjusted_stock_data[symbol] / adjusted_stock_data[f"{symbol}_EPS_TTM"]
    adjusted_stock_data[f"{symbol}_PE_TTM_avg"] = adjusted_stock_data[f"{symbol}_PE_TTM"].mean().round(2)
    adjusted_stock_data[f"{symbol}_PE_TTM_std"] = np.std(adjusted_stock_data[f"{symbol}_PE_TTM"]).round(2)
    adjusted_stock_data[f"{symbol}_PE_TTM_volatility_+"] = adjusted_stock_data[f"{symbol}_PE_TTM_avg"] + adjusted_stock_data[f"{symbol}_PE_TTM_std"] # 这个是PE的波动范围上限
    adjusted_stock_data[f"{symbol}_PE_TTM_volatility_-"] = adjusted_stock_data[f"{symbol}_PE_TTM_avg"] - adjusted_stock_data[f"{symbol}_PE_TTM_std"] # 这个是PE的波动范围下限

    adjusted_stock_data[f"{symbol}_relative_valuation_+"] = adjusted_stock_data[f"{symbol}_PE_TTM_volatility_+"] * adjusted_stock_data[f"{symbol}_EPS_TTM"] # 这个是relative valuation的价格上限
    adjusted_stock_data[f"{symbol}_relative_valuation_-"] = adjusted_stock_data[f"{symbol}_PE_TTM_volatility_-"] * adjusted_stock_data[f"{symbol}_EPS_TTM"] # 这个是relative valuation的价格下限
    adjusted_stock_data[f"{symbol}_relative_valuation_median"] = np.median([adjusted_stock_data[f"{symbol}_relative_valuation_+"].head(1), adjusted_stock_data[f"{symbol}_relative_valuation_-"].head(1)]) #这个是根据最新TTM PE估值的价格中位数

    adjusted_stock_data[f"{symbol}_relative_valuation_projected_+"] = adjusted_stock_data[f"{symbol}_PE_TTM_volatility_+"] * adjusted_stock_data[f"{symbol}_EPS_latest_projected"] # 这个是relative valuation的价格上限
    adjusted_stock_data[f"{symbol}_relative_valuation_projected_-"] = adjusted_stock_data[f"{symbol}_PE_TTM_volatility_-"] * adjusted_stock_data[f"{symbol}_EPS_latest_projected"] # 这个是relative valuation的价格下限
    adjusted_stock_data[f"{symbol}_relative_valuation_projected_median"] = np.median([adjusted_stock_data[f"{symbol}_relative_valuation_projected_+"], adjusted_stock_data[f"{symbol}_relative_valuation_projected_-"]]) #这个是根据3 qtrs post EPS + 1 projected EPS 得出PE估值的价格中位数

    adjusted_stock_data[f"{symbol}_{window_days}_price_min"] = adjusted_stock_data[symbol].min().round(2)
    adjusted_stock_data[f"{symbol}_{window_days}_price_max"] = adjusted_stock_data[symbol].max().round(2)
    adjusted_stock_data[f"{symbol}_{window_days}_price_avg"] = adjusted_stock_data[symbol].mean().round(2)
    adjusted_stock_data[f"{symbol}_{window_days}_price_std"] = np.std(adjusted_stock_data[symbol]).round(2)



    conditions = [
    (adjusted_stock_data[f"{symbol}"] < adjusted_stock_data[f"{symbol}_relative_valuation_-"]),
    (adjusted_stock_data[f"{symbol}"] > adjusted_stock_data[f"{symbol}_relative_valuation_+"]),
    ((adjusted_stock_data[f"{symbol}"] >= adjusted_stock_data[f"{symbol}_relative_valuation_-"]) & (adjusted_stock_data[f"{symbol}"] <= adjusted_stock_data[f"{symbol}_relative_valuation_+"])),
    ]

    categories = [
        'undervalued'
        ,'overvalued'
        ,'fair'
        ]

    # This KPI assess if the current stock price is under/over/fair to the current relative valuation
    adjusted_stock_data[f"{symbol}_curr_assessment"] = None

    for condition, category in zip(conditions, categories):
        adjusted_stock_data.loc[condition, f"{symbol}_price_valuation_assessment"] = category




# # Reset index to make it cleaner
# adjusted_stock_data.reset_index(inplace=True)

# # Example usage: Display the first few rows of the adjusted DataFrame
# adjusted_stock_data.sort_values(by='Date', ascending=False)

# Set display options to show all rows and columns
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)
selected_cols = [
    f"{symbol}"
    ,f"{symbol}_PE_TTM"
    ,f"{symbol}_PE_TTM_avg"
    ,f"{symbol}_relative_valuation_+"
    ,f"{symbol}_relative_valuation_-"
    ,f"{symbol}_relative_valuation_median"
    ,f"{symbol}_relative_valuation_projected_+"
    ,f"{symbol}_relative_valuation_projected_-"
    ,f"{symbol}_relative_valuation_projected_median"
    ,f"{symbol}_price_valuation_assessment"
]

# adjusted_stock_data.columns
adjusted_stock_data[selected_cols]

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0,TSM,TSM_PE_TTM,TSM_PE_TTM_avg,TSM_relative_valuation_+,TSM_relative_valuation_-,TSM_relative_valuation_median,TSM_relative_valuation_projected_+,TSM_relative_valuation_projected_-,TSM_relative_valuation_projected_median,TSM_price_valuation_assessment
2024-04-19,127.7,24.60501,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,fair
2024-04-18,132.27,25.485549,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,fair
2024-04-17,139.03,26.788054,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-16,139.8,26.936416,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-15,140.14,27.001927,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-12,142.52,27.460501,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-11,147.2,28.362235,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-10,146.22,28.17341,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-09,145.4,28.015414,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued
2024-04-08,142.79,27.512524,21.6,134.5248,89.6832,112.104,132.4512,88.3008,110.376,overvalued


In [234]:
np.median([110.5021, 95.3401])

102.9211

In [225]:
def get_earnings_dates(ticker_symbols):
    earnings_data = {}  # Dictionary to store earnings dates for each ticker
    
    for ticker in ticker_symbols:
        stock = yf.Ticker(ticker)
        earnings_dates = stock.get_earnings_dates(limit=12)  # Fetch earnings dates for the past 4 quarters and forecast for the next 4 quarters
        earnings_data[ticker] = earnings_dates
        
    return earnings_data


earnings_data = get_earnings_dates(ticker_symbols)


for ticker, dates in earnings_data.items():
    if ticker in ['NKE']:
        print(f"Earnings Dates for {ticker}:")
        print(dates)
        print()
    else:
        continue

Earnings Dates for NKE:
                           EPS Estimate  Reported EPS  Surprise(%)
Earnings Date                                                     
2025-03-19 06:00:00-04:00           NaN           NaN          NaN
2024-12-19 16:00:00-05:00           NaN           NaN          NaN
2024-09-26 06:00:00-04:00           NaN           NaN          NaN
2024-06-27 06:00:00-04:00          0.85           NaN          NaN
2024-03-21 16:00:00-04:00          0.74          0.98       0.3315
2023-12-21 16:00:00-05:00          0.85          1.03       0.2175
2023-09-28 16:00:00-04:00          0.75          0.94       0.2615
2023-06-29 16:00:00-04:00          0.67          0.66      -0.0145
2023-03-21 16:00:00-04:00          0.55          0.79       0.4416
2022-12-20 16:00:00-05:00          0.64          0.85       0.3204
2022-09-29 16:00:00-04:00          0.92          0.93       0.0104
2022-06-27 16:00:00-04:00          0.81          0.90       0.1179



In [226]:
np.median([15.5, 28.6])

22.05

In [217]:
earnings_data = get_earnings_dates(ticker_symbols)


for ticker, dates in earnings_data.items():
    if ticker == 'TSM':
        print(f"Earnings Dates for {ticker}:")
        print(dates)
        print()