In [1]:
import os
import pandas as pd
from pandas.tseries.offsets import QuarterEnd
from decimal import Decimal
import numpy as np
from datetime import datetime, timedelta
from dotenv import load_dotenv
import yfinance as yf


import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import pytz  # Make sure to import pytz for timezone handling
import seaborn as sns


import requests
import csv
import json

import warnings

In [2]:
# # yahoo finance free version

# today_date = datetime.today()
# window_days = 365*10 # This is 10 years in days window

# # Define the list of stock symbols
# stocks_list = [
#     'SPY'
#     ,'CNI'
#     ,'PM'
#     # ,'COST'
#     # ,'WMT'

#     ]

# # Fetch the data
# data = yf.download(
#     stocks_list
#     ,start=today_date - timedelta(days=window_days)
#     ,end=today_date)



# # The data contains multi-level columns, we'll focus on 'Adj Close' for adjusted closing prices
# adj_close = data['Adj Close']

# # Perform standardization using StandardScaler
# scaler = StandardScaler()
# standardized_data = scaler.fit_transform(adj_close)

# # Convert the normalized data back to a DataFrame
# standardized_data = pd.DataFrame(standardized_data, index=adj_close.index, columns=adj_close.columns)


# # Plotting the adjusted closing prices of the stocks
# plt.figure(figsize=(14, 7))

# for stock in stocks_list:
#     plt.plot(standardized_data[stock], label=stock)

# plt.title('Stock Prices Over Time')
# plt.xlabel('Date')
# plt.ylabel('Adjusted Closing Price')
# plt.legend()
# plt.grid(True)
# plt.show()


In [3]:
load_dotenv()

API_KEY = os.getenv("alpha_vantage_api_key")

In [4]:
# Addtional setting session
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")
pd.options.mode.copy_on_write = True

In [9]:
# Parameters section

alpha_vantage_api_key = API_KEY # FREE TIER API rate limit is 25 requests per day
# ticker_symbols = [
#     # 'XOM'
#     # ,'PM'
#     # ,'CNI'
#     # ,'VZ'
#     # ,'JNJ'
#     # ,'XOM'
#     # ,'WM'
#     # ,'CB'
#     # ,'TRV'
#     # ,'BRK.B'


#     # energy
#     'IVV'
#     # ,'FAST'
#     # ,'WM'
#     # ,'PG'
#     # ,'WMT'
#     # ,'CHD'
#     ,'COST'
#     ,'FANG'
#     # ,'IBN'

#  ]

# ticker_symbols = [
#     'MNST'
#     ,'COST'
#     ,'AZO'
#     ,'IVV'
# ]

# Basic Materials
ticker_symbols = [
    'SPY'
    ,'QQQ'

]

window_days = 365*25
start_date = datetime.today()
end_date = start_date - timedelta(days=window_days)

stock_return_consolidate_df = pd.DataFrame()
stock_daily_ts_consolidate_df = pd.DataFrame()

# Stock Price Return Comparison

In [10]:
for symbol in ticker_symbols:

    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df = pd.DataFrame()
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()


    # Daily quote section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={alpha_vantage_api_key}&outputsize=full'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Time Series (Daily)':


            selected_cols = [
                '4. close'
            ]

            Daily_stock_df = pd.DataFrame(value).transpose()[selected_cols] # tranpose the dataframe and sub select selected cols

            # Rename columns
            Daily_stock_df.rename(
                columns={
                    '4. close': 'stock_price'
                    }
                ,inplace=True
                )
            
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].round(2)
            Daily_stock_df.index = pd.to_datetime(Daily_stock_df.index)


    for date_i in Daily_stock_df.index.date:
        for date_j in stock_split_record_df['effective_date'].dt.date:
            if date_i == date_j:

                # stock price to divided the split factor
                Daily_stock_df.loc[Daily_stock_df.index.date < date_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.date == date_j].values[0])


    # Dividend section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=DIVIDENDS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            stock_dividend_df = pd.DataFrame(value)
            if len(value) > 0:
                stock_dividend_df = stock_dividend_df[['ex_dividend_date', 'amount']]

                # Rename columns
                stock_dividend_df.rename(
                        columns={
                            'ex_dividend_date': f'{symbol}_div_date'
                            ,'amount': f'{symbol}_div_amount'
                            }
                        ,inplace=True
                        )
                
                stock_dividend_df[f'{symbol}_div_amount'] = stock_dividend_df[f'{symbol}_div_amount'].astype(str).apply(lambda x: float(x))
                stock_dividend_df[f'{symbol}_div_amount'] = stock_dividend_df[f'{symbol}_div_amount'].round(2)
            else:
                stock_dividend_df[f'{symbol}_div_date'] = 0
                stock_dividend_df[f'{symbol}_div_amount'] = 0


    # stock_dividend_df.index = stock_dividend_df[f'{symbol}_div_date']



    # WTI oil price intervel can be daily, weekly, monthly
    url = f'https://www.alphavantage.co/query?function=WTI&interval=daily&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            WTI_daily_price_df = pd.DataFrame(value)

            # error index are the indexes which doesnot have a valid price, it shows '.'
            error_index = WTI_daily_price_df[WTI_daily_price_df['value']=='.'].index

            # Calculate the average of the previous and next prices
            previous_WTI_price = WTI_daily_price_df.loc[error_index + 1, 'value']
            next_WTI_price = WTI_daily_price_df.loc[error_index - 1, 'value']
            average_WTI_price = (previous_WTI_price + next_WTI_price) / 2

            # Update the DataFrame with the calculated average price
            WTI_daily_price_df.loc[error_index, 'value'] = average_WTI_price

            # transfer the string value to decimal
            WTI_daily_price_df['value'] = WTI_daily_price_df['value'].astype(str).apply(lambda x: float(x))

            WTI_daily_price_df.rename(
                columns={
                    'value':'WTI'
                    ,'date':'Date'
                }
                ,inplace=True
            )

            WTI_daily_price_df['Date'] = pd.to_datetime(WTI_daily_price_df['Date'])





    # Time window filter applied
    Daily_stock_df_filter_timewindow = Daily_stock_df[(Daily_stock_df.index.date >= end_date.date())
                                                & (Daily_stock_df.index.date <= start_date.date())
                                                ]
    
    stock_dividend_df_filter_timewindow = stock_dividend_df[(pd.to_datetime(stock_dividend_df[f'{symbol}_div_date']) >= pd.to_datetime(end_date))
                                                    & (pd.to_datetime((stock_dividend_df[f'{symbol}_div_date'])) <= pd.to_datetime(start_date))
                                                    ]
    

#     # Calculate the return %
#     # return including dividend = (current_price - initial_price + total_dividend_during_period) / initial_price
    stock_return = round(
        (
            (
                Daily_stock_df_filter_timewindow[f'stock_price'][0] 
                - Daily_stock_df_filter_timewindow[f'stock_price'][-1] 
                + stock_dividend_df_filter_timewindow[f'{symbol}_div_amount'].sum()
            ) 
            / Daily_stock_df_filter_timewindow[f'stock_price'][-1]
        )
    ,4
    )

    # assign value to columns in consolidate dataframe
    stock_return_consolidate_df.loc[0,f'{symbol}_{window_days/365}_yrs_return'] = stock_return
    stock_daily_ts_consolidate_df[f'{symbol}'] = Daily_stock_df_filter_timewindow[f'stock_price']


####### WTI merge step ###########
# Step to merge the WTI crude oil price into the stock_daily_ts_consolidate_df
stock_daily_ts_consolidate_df = stock_daily_ts_consolidate_df.merge(
    WTI_daily_price_df
    ,left_on=stock_daily_ts_consolidate_df.index
    ,right_on='Date'
    ,how='left'
    )

# clean the merged WTI dataframe 
stock_daily_ts_consolidate_df.index = stock_daily_ts_consolidate_df['Date']
stock_daily_ts_consolidate_df = stock_daily_ts_consolidate_df.drop(
    columns='Date'
) \

####### WTI merge step ###########


stock_daily_ts_consolidate_normalized_df = stock_daily_ts_consolidate_df / stock_daily_ts_consolidate_df.iloc[-1]

In [11]:
stock_return_consolidate_df.transpose().sort_values(
    by=stock_return_consolidate_df.transpose().columns[0]
    ,ascending=False
    )

Unnamed: 0,0
QQQ_25.0_yrs_return,5.1414
SPY_25.0_yrs_return,3.8659


In [12]:
stock_daily_ts_consolidate_normalized_df = stock_daily_ts_consolidate_df / stock_daily_ts_consolidate_df.iloc[-1]


# Create a Plotly figure
fig = go.Figure()

# Add traces for each stock
for column in stock_daily_ts_consolidate_normalized_df.columns:
    fig.add_trace(go.Scatter(
        x=stock_daily_ts_consolidate_normalized_df.index,
        y=stock_daily_ts_consolidate_normalized_df[column],
        mode='lines',
        name=column
    ))

# Customize layout
fig.update_layout(
    title=f'Normalized Stock Prices in {window_days/365} years',
    xaxis_title='Date',
    yaxis_title='Normalized Price',
    legend_title='Stock',
    template='plotly_white'
)

# Show the plot
fig.show()

# EPS TTM Comparison

In [32]:
# Parameters section

alpha_vantage_api_key = API_KEY # FREE TIER API rate limit is 25 requests per day

# # Consumer Staples
# ticker_symbols = [
#     'MO',
#     'ADM',
#     # 'BF.B', # DATA ISSUE
#     'BG',
#     'CPB',
#     'CHD',
#     'CLX',
#     'KO',
#     'CL',
#     'CAG',
#     'STZ',
#     'COST',
#     'DG',
#     'DLTR',
#     'EL',
#     'GIS',
#     'HSY',
#     'HRL',
#     'K',
#     # 'KVUE',
#     'KDP',
#     'KMB',
#     # 'KHC',
#     'KR', 
#     # 'LW',
#     'MKC', 
#     'TAP',
#     'MDLZ',
#     'MNST',
#     # 'PEP',
#     'PM',
#     'PG',
#     'SJM',
#     'SYY',
#     'TGT',
#     'TSN',
#     'WBA',
#     'WMT'
# ]


# Energy
# ticker_symbols = [
# 'APA'
# ,'BKR'
# ,'CVX'
# ,'COP'
# ,'CTRA'
# ,'DVN'
# ,'FANG'
# ,'EOG'
# ,'EQT'
# ,'XOM'
# ,'HAL'
# ,'HES'
# ,'KMI'
# ,'MRO'
# ,'MPC'
# ,'OXY'
# ,'OKE'
# ,'PSX'
# ,'SLB'
# ,'TRGP'
# ,'VLO'
# ,'WMB'
# ]


# # Consumer Dis
# ticker_symbols = [
# # 'ABNB'
# 'AMZN'
# # ,'APTV'
# ,'AZO'
# ,'BBY'
# ,'BKNG'
# ,'BWA'
# ,'CZR'
# ,'KMX'
# ,'CCL'
# ,'CMG'
# ,'DRI'
# ,'DECK'
# ,'DPZ'
# ,'DHI'
# ,'EBAY'
# ,'EXPE'
# ,'F'
# ,'GRMN'
# # ,'GM'
# ,'GPC'
# ,'HAS'
# # ,'HLT'
# ,'HD'
# ,'LVS'
# ,'LEN'
# ,'LKQ'
# ,'LOW'
# # ,'LULU'
# ,'MAR'
# ,'MCD'
# ,'MGM'
# ,'MHK'
# ,'NKE'
# # ,'NCLH'
# ,'NVR'
# ,'ORLY'
# ,'POOL'
# ,'PHM'
# ,'RL'
# ,'ROST'
# ,'RCL'
# ,'SBUX'
# ,'TPR'
# # ,'TSLA'
# ,'TJX'
# ,'TSCO'
# # ,'ULTA'
# ,'WYNN'
# ,'YUM'    
# ]

# # Consumer Dis / Restuarants
# ticker_symbols = [
# 'MCD'
# ,'SBUX'
# ,'YUM'
# ,'QSR'
# ,'DRI'
# ,'YUMC'
# ,'CAVA'
# ,'DPZ'
# ,'WING'
# ,'TXRH'
# ,'ARMK'
# ,'SHAK'
# ,'SG'
# ,'EAT'
# ,'WEN'
# ,'CAKE'
# ,'ARCO'
# ,'PZZA'
# ,'BLMN'
# ,'HDL'
# ,'CBRL'
# ,'KRUS'
# ,'PTLO'
# ,'JACK'
# ,'BJRI'
# ,'CHUY'
# # ,'BH'
# ,'DIN'
# ,'LOCO'
# ,'RICK'
# ,'NATH'
# ,'DENN'
# ,'GENK'
# ,'PBPB'
# ,'THCH'
# ,'STKS'
# ,'FATBB'
# ,'FAT'
# ,'RRGB'
# ,'NDLS'
# ,'BDL'
# ,'ARKR'
# ,'RAVE'
# ,'GTIM'
# ,'PNST'
# ,'SDOT'
# ,'BTBD'
# ,'REBN'
# ,'YOSH'
# ]

# # Consumer Non-Durables / Food: Specialty/Candy
# ticker_symbols = [
# 'LSF'
# ,'CHSN'
# ,'JVA'
# ,'FRPT'
# ,'STKL'
# ,'SOWG'
# ,'IFF'
# ,'K'
# ,'KLG'
# ,'INGR'
# ,'SXT'
# ,'BCPC'
# ,'POST'
# ,'UTZ'
# ,'BROS'
# ,'MKC'
# ,'PETZ'
# ,'KDP'
# ,'NOMD'
# ,'BRCC'
# ,'CPB'
# ,'SENEB'
# ,'ASH'
# ,'SENEA'
# ,'JJSF'
# ,'PEP'
# ,'LANC'
# ,'FLO'
# ,'SJM'
# ,'TR'
# ,'THS'
# ,'HSY'
# ,'JBSS'
# ,'BOF'
# ,'FARM'
# ,'BRID'
# ,'LW'
# ,'DAR'
# ,'WEST'
# ,'RMCF'
# ,'SNAX'
# ,'PLAG'
# ,'NUZE'

# ]


# Basic Materials
ticker_symbols = [
    'LIN'
    ,'SHW'
    ,'APD'
    ,'FCX'
    ,'ECL'
    ,'CTVA'
    ,'NEM'
    ,'VMC'
    ,'DD'
    ,'MLM'
    ,'NUE'
    ,'DOW'
    ,'PPG'
    # ,'SW'
    ,'IFF'
    ,'LYB'
    ,'PKG'
    ,'IP'
    ,'STLD'
    ,'BALL'
    ,'AVY'
    ,'CF'
    ,'AMCR'
    ,'EMN'
    ,'ALB'
    ,'CE'
    ,'MOS'
    ,'FMC'
]

# ticker_symbols = [
#     'MNST'
#     ,'IVV'
#     ,'AAPL'
# ]

window_period = 10 # years
current_year = datetime.today().year
start_year = current_year - window_period

current_date = pd.Timestamp(datetime.today()).normalize()  # Start with today's date
current_date_prevQtr = start_date + QuarterEnd(-2) 
qtr_range = pd.date_range(start=f'{start_year}-03-31', end=start_date, freq='Q')


stock_consolidate_annual_eps_df = pd.DataFrame()
stock_consolidate_ttm_eps_df = pd.DataFrame()

stock_consolidate_annual_eps_df['fiscalDateEnding'] = [i for i in range(start_year, current_year)]
stock_consolidate_ttm_eps_df['fiscalDateEnding'] = qtr_range
# stock_consolidate_ttm_eps_df.sort_index(ascending=False, inplace=True)

In [44]:
# Earning section
# past earnings from alpha vintage API
for j, symbol in enumerate(ticker_symbols):
    print(j, symbol)
    url = f'https://www.alphavantage.co/query?function=EARNINGS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'annualEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]

            annualEPS_df = pd.DataFrame(value) # tranpose the dataframe and sub select selected cols


            annualEPS_df['fiscalDateEnding'] = pd.to_datetime(annualEPS_df['fiscalDateEnding']).dt.year

            annualEPS_df = annualEPS_df[
                annualEPS_df['fiscalDateEnding'].isin(
                    range(
                        (datetime.today().year - window_period) 
                        ,datetime.today().year
                            )
                            )
                            ]

            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    annualEPS_df[f'{col}'] = annualEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue
            
            # clean annualEPS_df
            annualEPS_df = annualEPS_df.sort_values('reportedEPS', ascending=False).drop_duplicates('fiscalDateEnding')
            annualEPS_df = annualEPS_df.sort_values('fiscalDateEnding', ascending=True).reset_index(drop=True)
            annualEPS_df.rename(
                columns={
                    'reportedEPS': f'{symbol}_EPS'
                }
                ,inplace=True
            )

        if key == 'quarterlyEarnings':

            selected_cols = [
                'fiscalDateEnding'
                ,'reportedEPS'
            ]


            qtrEPS_df = pd.DataFrame(value)[selected_cols] # tranpose the dataframe and sub select selected cols
            qtrEPS_df['fiscalDateEnding'] = pd.to_datetime(qtrEPS_df['fiscalDateEnding'])

            qtrEPS_df = qtrEPS_df.sort_values(['fiscalDateEnding'], ascending=True)
            
            # fill na to 0 for convert decimal type purpose
            qtrEPS_df['reportedEPS'] = qtrEPS_df['reportedEPS'].replace('None', 0)
            qtrEPS_df['reportedEPS'] = qtrEPS_df['reportedEPS'].fillna(0)
            
            # Convert the column to decimal type
            for col in selected_cols:
                if col in ['reportedEPS']:
                    qtrEPS_df[f'{col}'] = qtrEPS_df[f'{col}'].astype(str).apply(lambda x: float(x))

                else:
                    continue


            # Calculate the EPS TTM by summing the current quarter and the previous 3 quarters
            qtrEPS_df[f'{symbol}_EPS_TTM'] = qtrEPS_df['reportedEPS'].rolling(window=4).sum().fillna(0)

            # filtered based on window_period years
            qtrEPS_df = qtrEPS_df[qtrEPS_df['fiscalDateEnding'] >= pd.Timestamp(f'{start_year}-03-31')]

            qtrEPS_df = qtrEPS_df.drop(columns=['reportedEPS'])



            # consolidation
            stock_consolidate_annual_eps_df = pd.merge(
                stock_consolidate_annual_eps_df
                ,annualEPS_df
                ,left_on='fiscalDateEnding'
                ,right_on='fiscalDateEnding'
                ,how='left'
            ).fillna(0)

            stock_consolidate_ttm_eps_df = pd.merge(
                stock_consolidate_ttm_eps_df
                ,qtrEPS_df
                ,left_on='fiscalDateEnding'
                ,right_on='fiscalDateEnding'
                ,how='left'
            ).fillna(0)

0 LIN
1 SHW
2 APD
3 FCX
4 ECL
5 CTVA
6 NEM
7 VMC
8 DD
9 MLM
10 NUE
11 DOW
12 PPG
13 IFF
14 LYB
15 PKG
16 IP
17 STLD
18 BALL
19 AVY
20 CF
21 AMCR
22 EMN
23 ALB
24 CE
25 MOS
26 FMC


In [45]:
stock_consolidate_annual_eps_df

Unnamed: 0,fiscalDateEnding,LIN_EPS_x,SHW_EPS_x,LIN_EPS_y,SHW_EPS_y,APD_EPS,FCX_EPS,ECL_EPS,CTVA_EPS,NEM_EPS,VMC_EPS,DD_EPS,MLM_EPS,NUE_EPS,DOW_EPS,PPG_EPS,IFF_EPS,LYB_EPS,PKG_EPS,IP_EPS,STLD_EPS,BALL_EPS,AVY_EPS,CF_EPS,AMCR_EPS,EMN_EPS,ALB_EPS,CE_EPS,MOS_EPS,FMC_EPS
0,2014,6.28,2.98,6.28,2.98,5.78,1.98,4.18,0.0,1.09,0.93,5.9936,3.43,2.28,3.1,4.88,5.08,8.85,4.66,3.04,1.3,1.94,3.12,3.6,0.0,7.06,4.19,5.69,2.72,4.03
1,2015,5.81,3.72,5.81,3.72,6.57,-0.09,4.38,0.0,0.99,2.19,3.91,4.52,1.74,3.52,5.7,5.25,10.39,4.53,3.65,0.73,1.81,3.44,3.81,0.0,7.28,3.94,6.05,2.9,2.51
2,2016,5.49,4.17,5.49,4.17,7.53,0.2,4.38,0.0,1.41,2.86,3.35,6.63,2.26,3.79,5.91,5.5,9.22,4.89,3.36,1.91,1.74,4.03,0.47,0.0,6.76,3.9,6.61,0.79,2.82
3,2017,5.85,4.9,5.85,4.9,6.31,1.17,4.69,0.0,1.46,3.02,5.1425,7.19,3.57,0.0,5.89,5.89,10.22,6.03,3.6,2.66,2.03,5.01,-0.26,0.52,7.62,4.6,7.51,1.1,2.71
4,2018,6.52,6.17,6.52,6.17,7.39,1.5,5.25,0.9532,1.34,4.06,5.84,7.76,7.64,6.307,5.89,6.21,11.24,8.03,5.34,5.49,2.19,6.07,1.21,0.62,8.18,5.5,11.03,2.12,6.29
5,2019,7.35,7.03,7.35,7.03,8.22,0.02,5.82,1.43,1.31,4.7,4.66,9.72,4.23,3.27,6.21,6.17,9.58,7.65,4.44,3.09,2.54,6.59,2.09,0.45,7.12,6.04,9.52,0.28,6.08
6,2020,8.24,8.19,8.24,8.19,8.55,0.54,4.16,1.5,2.65,4.7,3.37,10.68,3.3,1.64,5.7,5.7,5.68,5.78,2.8,2.83,2.96,7.11,1.51,0.63,6.14,4.12,7.63,0.85,6.2
7,2021,10.69,8.14,10.69,8.14,9.02,3.13,4.82,2.13,2.95,5.05,4.27,12.25,23.52,8.98,6.77,5.67,18.19,9.39,3.95,16.24,3.49,8.92,4.29,0.75,8.86,4.05,18.21,5.04,6.93
8,2022,12.29,8.74,12.29,8.74,10.41,2.43,4.49,2.65,1.86,5.23,3.41,12.1,28.73,6.22,6.06,5.56,12.44,11.13,3.88,22.58,2.78,9.15,16.95,0.8,7.83,21.95,15.91,11.01,7.41
9,2023,14.21,10.34,14.21,10.34,11.51,1.53,5.21,2.68,1.59,6.99,3.48,19.33,17.99,2.24,7.67,3.34,8.8,8.69,2.17,14.9,2.91,8.33,8.33,0.74,6.4,22.24,8.92,3.57,3.78


In [46]:

stock_consolidate_annual_eps_df = stock_consolidate_annual_eps_df[stock_consolidate_annual_eps_df['fiscalDateEnding'] >= 2005]
# Assuming stock_consolidate_annual_eps_df is your DataFrame
# Create a copy of the DataFrame for normalization
stock_consolidate_annual_eps_normalized_df = stock_consolidate_annual_eps_df.copy()

# List of columns to normalize
ticker_symbol_cols = stock_consolidate_annual_eps_normalized_df.columns.to_list()
ticker_symbol_cols.remove('fiscalDateEnding')

# Function to normalize based on the first non-zero value
def normalize_column(column):
    # Find the first non-zero value
    first_non_zero = column[column != 0].iloc[0]
    return column / first_non_zero

# Normalize each EPS column
for col in ticker_symbol_cols:
    stock_consolidate_annual_eps_normalized_df[col] = normalize_column(stock_consolidate_annual_eps_normalized_df[col])

In [47]:
stock_consolidate_annual_eps_normalized_df

Unnamed: 0,fiscalDateEnding,LIN_EPS_x,SHW_EPS_x,LIN_EPS_y,SHW_EPS_y,APD_EPS,FCX_EPS,ECL_EPS,CTVA_EPS,NEM_EPS,VMC_EPS,DD_EPS,MLM_EPS,NUE_EPS,DOW_EPS,PPG_EPS,IFF_EPS,LYB_EPS,PKG_EPS,IP_EPS,STLD_EPS,BALL_EPS,AVY_EPS,CF_EPS,AMCR_EPS,EMN_EPS,ALB_EPS,CE_EPS,MOS_EPS,FMC_EPS
0,2014,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
1,2015,0.925159,1.248322,0.925159,1.248322,1.136678,-0.045455,1.047847,0.0,0.908257,2.354839,0.652363,1.317784,0.763158,1.135484,1.168033,1.033465,1.174011,0.972103,1.200658,0.561538,0.93299,1.102564,1.058333,0.0,1.031161,0.940334,1.063269,1.066176,0.622829
2,2016,0.874204,1.399329,0.874204,1.399329,1.302768,0.10101,1.047847,0.0,1.293578,3.075269,0.55893,1.932945,0.991228,1.222581,1.211066,1.082677,1.041808,1.049356,1.105263,1.469231,0.896907,1.291667,0.130556,0.0,0.957507,0.930788,1.161687,0.290441,0.699752
3,2017,0.931529,1.644295,0.931529,1.644295,1.091696,0.590909,1.12201,0.0,1.33945,3.247312,0.857999,2.09621,1.565789,0.0,1.206967,1.159449,1.154802,1.293991,1.184211,2.046154,1.046392,1.605769,-0.072222,1.0,1.07932,1.097852,1.319859,0.404412,0.672457
4,2018,1.038217,2.07047,1.038217,2.07047,1.278547,0.757576,1.255981,1.0,1.229358,4.365591,0.974373,2.262391,3.350877,2.034516,1.206967,1.222441,1.270056,1.723176,1.756579,4.223077,1.128866,1.945513,0.336111,1.192308,1.15864,1.312649,1.938489,0.779412,1.560794
5,2019,1.170382,2.35906,1.170382,2.35906,1.422145,0.010101,1.392344,1.50021,1.201835,5.053763,0.777496,2.833819,1.855263,1.054839,1.272541,1.214567,1.082486,1.641631,1.460526,2.376923,1.309278,2.112179,0.580556,0.865385,1.008499,1.441527,1.673111,0.102941,1.508685
6,2020,1.312102,2.748322,1.312102,2.748322,1.479239,0.272727,0.995215,1.573647,2.431193,5.053763,0.562266,3.113703,1.447368,0.529032,1.168033,1.122047,0.641808,1.240343,0.921053,2.176923,1.525773,2.278846,0.419444,1.211538,0.869688,0.983294,1.340949,0.3125,1.538462
7,2021,1.702229,2.731544,1.702229,2.731544,1.560554,1.580808,1.15311,2.234578,2.706422,5.430108,0.712427,3.571429,10.315789,2.896774,1.387295,1.116142,2.055367,2.015021,1.299342,12.492308,1.798969,2.858974,1.191667,1.442308,1.254958,0.966587,3.200351,1.852941,1.719603
8,2022,1.957006,2.932886,1.957006,2.932886,1.801038,1.227273,1.074163,2.780109,1.706422,5.623656,0.56894,3.527697,12.600877,2.006452,1.241803,1.094488,1.40565,2.388412,1.276316,17.369231,1.43299,2.932692,4.708333,1.538462,1.109065,5.238663,2.796134,4.047794,1.83871
9,2023,2.262739,3.469799,2.262739,3.469799,1.991349,0.772727,1.246411,2.811582,1.458716,7.516129,0.580619,5.635569,7.890351,0.722581,1.571721,0.65748,0.99435,1.864807,0.713816,11.461538,1.5,2.669872,2.313889,1.423077,0.906516,5.307876,1.567663,1.3125,0.937965


In [60]:
fig = px.line(stock_consolidate_annual_eps_normalized_df, 
              x='fiscalDateEnding', 
              y=ticker_symbol_cols, 
              labels={'value':'Normalized EPS', 'fiscalDateEnding':'Year'}, 
              title='Normalized EPS Comparison Over Time')

# Add markers to the lines
fig.update_traces(mode='lines+markers')

# Set log scale for the y-axis
fig.update_layout(
    yaxis_type="log",  # This sets the y-axis to log scale
    xaxis_title='Fiscal Year',
    yaxis_title='EPS Value (Log Scale)',
    legend_title='Company',
    hovermode='x unified'
)

fig.show()

In [48]:
stock_consolidate_ttm_eps_df

Unnamed: 0,fiscalDateEnding,LIN_EPS_TTM_x,SHW_EPS_TTM_x,LIN_EPS_TTM_y,SHW_EPS_TTM_y,APD_EPS_TTM,FCX_EPS_TTM,ECL_EPS_TTM,CTVA_EPS_TTM,NEM_EPS_TTM,VMC_EPS_TTM,DD_EPS_TTM,MLM_EPS_TTM,NUE_EPS_TTM,DOW_EPS_TTM,PPG_EPS_TTM,IFF_EPS_TTM,LYB_EPS_TTM,PKG_EPS_TTM,IP_EPS_TTM,STLD_EPS_TTM,BALL_EPS_TTM,AVY_EPS_TTM,CF_EPS_TTM,AMCR_EPS_TTM,EMN_EPS_TTM,ALB_EPS_TTM,CE_EPS_TTM,MOS_EPS_TTM,FMC_EPS_TTM
0,2014-03-31,6.1,2.58,6.1,2.58,5.49,2.41,3.68,0.0,1.94,0.35,10.0453,2.86,1.63,0.0,4.35,4.6,6.85,3.74,3.13,0.78,1.77,2.74,4.4,0.0,6.44,4.1,4.69,2.47,3.76
1,2014-06-30,6.19,2.71,6.19,2.71,5.59,2.71,3.85,0.0,1.69,0.49,6.6433,3.32,1.82,2.68,4.54,4.83,7.47,4.19,3.44,0.96,1.9,2.83,3.91,0.0,6.56,4.23,5.04,2.03,3.83
2,2014-09-30,6.3,2.94,6.3,2.94,5.78,2.57,4.02,0.0,1.73,0.7,5.8736,3.22,2.09,2.9,4.73,4.93,8.48,4.54,3.34,1.18,1.95,2.91,3.65,0.0,6.77,4.28,5.45,2.08,3.96
3,2014-12-31,6.28,2.98,6.28,2.98,5.99,1.98,4.18,0.0,1.09,0.93,5.9936,3.43,2.28,3.1,4.88,5.08,8.85,4.66,3.04,1.3,1.94,3.12,3.6,0.0,7.06,4.19,5.69,2.72,4.03
4,2015-03-31,6.2,3.02,6.2,3.02,6.22,1.35,4.24,0.0,1.33,1.05,4.9588,3.87,2.08,3.15,5.08,5.21,9.81,4.59,3.27,1.31,1.88,3.28,3.67,0.0,7.29,4.4,6.08,2.83,3.7
5,2015-06-30,6.07,3.27,6.07,3.27,6.41,0.97,4.29,0.0,1.39,1.34,3.94,3.77,2.01,3.23,5.33,5.2,10.39,4.61,3.29,1.22,1.83,3.39,4.04,0.0,7.38,4.14,6.19,3.18,3.39
6,2015-09-30,5.91,3.47,5.91,3.47,6.57,0.18,4.36,0.0,1.12,1.76,3.36,4.36,1.93,3.44,5.53,5.27,10.67,4.61,3.31,1.0,1.83,3.49,4.01,0.0,7.33,3.9,6.08,3.24,2.86
7,2015-12-31,5.81,3.72,5.81,3.72,6.8,-0.09,4.38,0.0,0.99,2.19,3.91,4.52,1.74,3.52,5.7,5.25,10.39,4.53,3.65,0.73,1.81,3.44,3.81,0.0,7.28,3.94,6.05,2.9,2.51
8,2016-03-31,5.66,3.86,5.66,3.86,7.07,-0.19,4.35,0.0,0.87,2.61,3.83,5.12,1.75,3.57,5.82,5.27,10.1,4.63,3.61,0.82,1.75,3.57,3.24,0.0,7.15,3.89,6.16,2.34,2.47
9,2016-06-30,5.6,3.98,5.6,3.98,7.34,-0.35,4.35,0.0,1.05,2.85,3.89,5.78,2.09,3.7,6.0,5.38,9.75,4.7,3.56,1.18,1.77,3.75,1.98,0.0,6.82,4.14,6.17,1.35,2.46


# SP500 Sectors

In [14]:
# Function to get the list of S&P 500 companies and their sectors
def get_sp500_companies():
    # Fetch the S&P 500 company symbols and sectors from a reliable source (e.g., Wikipedia)
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    
    # Extract the relevant table containing the company symbols and sectors
    sp500_df = tables[0]
    
    # Return the DataFrame containing S&P 500 companies and sectors
    return sp500_df[['Symbol', 'GICS Sector', 'GICS Sub-Industry']]


# Function to create a dictionary of sectors and sub-sectors
def create_sector_subsector_dict(df):
    sector_subsector_dict = {}
    for index, row in df.iterrows():
        sector = row['GICS Sector']
        subsector = row['GICS Sub-Industry']
        if sector not in sector_subsector_dict:
            sector_subsector_dict[sector] = [subsector]
        else:
            sector_subsector_dict[sector].append(subsector)
    return sector_subsector_dict

# Function to filter the S&P 500 companies by sector
def company_sector_list(df, sector):
    return df[df['GICS Sector'] == sector]['Symbol'].tolist()

def company_sub_sector_list(df, sub_sector):
    return df[df['GICS Sub-Industry'] == sub_sector]['Symbol'].tolist()


# Get the list of S&P 500 companies and their sectors
sp500_df  = get_sp500_companies()

sp500_companies_sectors = sp500_df ['GICS Sector'].value_counts().index
sp500_companies_sub_sectors = sp500_df ['GICS Sub-Industry'].value_counts().index

sector_subsector_dict = create_sector_subsector_dict(sp500_df)


# Function to create a DataFrame from the sector_subsector_dict
def create_sector_dataframe():
    # Create a list to store dictionacompany_sector_listries representing each row of data
    data = []
    
    # Filter the DataFrame to get stocks in the specified sector
    for sector in sp500_companies_sectors:
        sector_stocks_list = company_sector_list(sp500_df, sector)

        # Iterate over the stocks in the sector and create a dictionary for each
        for i, ticker in enumerate(sector_stocks_list, start=1):
            # Create a dictionary for the current stock in the sector
            row_data = {'Sector': sector, 'Ticker': ticker}
            # Append the dictionary to the list
            data.append(row_data)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    return df


sector_subsector_dict = create_sector_subsector_dict(sp500_df)


sector_ticker_df = create_sector_dataframe()

In [15]:
# Pivot the DataFrame
pivot_sector_ticker_df = sector_ticker_df.groupby('Sector')['Ticker'].apply(list).reset_index()

# Transpose to get sectors as columns
pivot_sector_ticker_df = pivot_sector_ticker_df.set_index('Sector').T

pivot_sector_ticker_df

Sector,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Utilities
Ticker,"[GOOGL, GOOG, T, CHTR, CMCSA, EA, FOXA, FOX, I...","[ABNB, AMZN, APTV, AZO, BBY, BKNG, BWA, CZR, K...","[MO, ADM, BF.B, BG, CPB, CHD, CLX, KO, CL, CAG...","[APA, BKR, CVX, COP, CTRA, DVN, FANG, EOG, EQT...","[AFL, ALL, AXP, AIG, AMP, AON, ACGL, AJG, AIZ,...","[ABT, ABBV, A, ALGN, AMGN, BAX, BDX, TECH, BII...","[MMM, AOS, ALLE, AMTM, AME, ADP, AXON, BA, BR,...","[ACN, ADBE, AMD, AKAM, APH, ADI, ANSS, AAPL, A...","[APD, ALB, AMCR, AVY, BALL, CE, CF, CTVA, DOW,...","[ARE, AMT, AVB, BXP, CPT, CBRE, CSGP, CCI, DLR...","[AES, LNT, AEE, AEP, AWK, ATO, CNP, CMS, ED, C..."


In [16]:
sp500_sectors_dict = {}

In [17]:
for col in pivot_sector_ticker_df.columns:
    sp500_sectors_dict[col] = pivot_sector_ticker_df[col].iloc[0]

In [18]:
for key, value in sp500_sectors_dict.items():
    print(key, value)

Communication Services ['GOOGL', 'GOOG', 'T', 'CHTR', 'CMCSA', 'EA', 'FOXA', 'FOX', 'IPG', 'LYV', 'MTCH', 'META', 'NFLX', 'NWSA', 'NWS', 'OMC', 'PARA', 'TMUS', 'TTWO', 'VZ', 'DIS', 'WBD']
Consumer Discretionary ['ABNB', 'AMZN', 'APTV', 'AZO', 'BBY', 'BKNG', 'BWA', 'CZR', 'KMX', 'CCL', 'CMG', 'DRI', 'DECK', 'DPZ', 'DHI', 'EBAY', 'EXPE', 'F', 'GRMN', 'GM', 'GPC', 'HAS', 'HLT', 'HD', 'LVS', 'LEN', 'LKQ', 'LOW', 'LULU', 'MAR', 'MCD', 'MGM', 'MHK', 'NKE', 'NCLH', 'NVR', 'ORLY', 'POOL', 'PHM', 'RL', 'ROST', 'RCL', 'SBUX', 'TPR', 'TSLA', 'TJX', 'TSCO', 'ULTA', 'WYNN', 'YUM']
Consumer Staples ['MO', 'ADM', 'BF.B', 'BG', 'CPB', 'CHD', 'CLX', 'KO', 'CL', 'CAG', 'STZ', 'COST', 'DG', 'DLTR', 'EL', 'GIS', 'HSY', 'HRL', 'K', 'KVUE', 'KDP', 'KMB', 'KHC', 'KR', 'LW', 'MKC', 'TAP', 'MDLZ', 'MNST', 'PEP', 'PM', 'PG', 'SJM', 'SYY', 'TGT', 'TSN', 'WBA', 'WMT']
Energy ['APA', 'BKR', 'CVX', 'COP', 'CTRA', 'DVN', 'FANG', 'EOG', 'EQT', 'XOM', 'HAL', 'HES', 'KMI', 'MRO', 'MPC', 'OXY', 'OKE', 'PSX', 'SLB', 'TRG

In [None]:
# Consumer Non-Durables / Food: Specialty/Candy
ticker_symbols = [
'LSF'
,'CHSN'
,'JVA'
,'FRPT'
,'STKL'
,'SOWG'
,'IFF'
,'K'
,'KLG'
,'INGR'
,'SXT'
,'BCPC'
,'POST'
,'UTZ'
,'BROS'
,'MKC'
,'PETZ'
,'KDP'
,'NOMD'
,'BRCC'
,'CPB'
,'SENEB'
,'ASH'
,'SENEA'
,'JJSF'
,'PEP'
,'LANC'
,'FLO'
,'SJM'
,'TR'
,'THS'
,'HSY'
,'JBSS'
,'BOF'
,'FARM'
,'BRID'
,'LW'
,'DAR'
,'WEST'
,'RMCF'
,'SNAX'
,'PLAG'
,'NUZE'

]