In [None]:
import os
import datetime

import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
import plotly.express as px
import plotly

from src.api.api_utils import FinancialModelingPrepAPI

tqdm.pandas()

#### Setup API

In [2]:
load_dotenv()
api_key = os.getenv("API_KEY")
api = FinancialModelingPrepAPI(api_key)

#### Get S&P 500 companies

In [3]:
snp_url = f"https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={api_key}"
response = api.session.get(snp_url).json()
snp_data = pd.DataFrame.from_records(response)
snp_data.head()

Unnamed: 0,symbol,name,sector,subSector,headQuarter,dateFirstAdded,cik,founded
0,DASH,DoorDash,Communication Services,Internet Content & Information,"San Francisco, CA",2025-03-24,1792789,2013
1,EXE,Expand Energy,Energy,Oil & Gas Exploration & Production,"Oklahoma City, OK",2025-03-24,895126,1989
2,TKO,TKO Group Holdings,Communication Services,Entertainment,"New York, NY",2025-03-24,1973266,2023
3,WSM,Williams-Sonoma,Consumer Cyclical,Specialty Retail,"San Francisco, CA",2025-03-24,719955,1956
4,APO,Apollo Global Management,Financial Services,Asset Management - Global,"New York City, New York",2024-12-23,1858681,1990


### Benjamin Graham's criteria for stock selection (Defensive Investor)

1. Adequate size of the enterprise
2. Strong financial condition
3. Earnings stability
4. Dividend record
5. Earnings growth
6. Moderate price/earnings ratio
7. Moderate price to assets ratio


#### 1. Adequate size of the enterprise
Adequate size of the enterprise (market cap > $2 billion in 2003 -> ~ $3,4 billion in 2024)
Reference: Inflation calculator - https://www.nerdwallet.com/calculator/inflation-calculator


In [4]:
# Initial call returns an empty list sometimes, so we need to retry until we get the data
snp_data["market_cap"] = snp_data.progress_apply(
    lambda x: api.get_market_cap(x["symbol"]), axis=1
)
print(f"Companies without market cap: {snp_data['market_cap'].isna().sum()}")

# set time limit for the while loop
time_limit = datetime.datetime.now() + datetime.timedelta(seconds=30)

# While loop to ensure all companies have a market cap
while snp_data["market_cap"].isna().any() and datetime.datetime.now() < time_limit:
    # Apply the get_market_cap function to companies without a market cap
    snp_data["market_cap"] = snp_data.progress_apply(
        lambda x: api.get_market_cap(x["symbol"])
        if pd.isna(x["market_cap"])
        else x["market_cap"],
        axis=1,
    )

# Sanity check
print(f"Companies without market cap: {snp_data['market_cap'].isna().sum()}")

100%|██████████| 503/503 [01:16<00:00,  6.57it/s]


Companies without market cap: 94


100%|██████████| 503/503 [00:12<00:00, 40.74it/s]  

Companies without market cap: 0





In [5]:
snp_data["market_cap"].describe()

count    5.030000e+02
mean     1.028615e+11
std      2.953585e+11
min      5.631606e+09
25%      1.813118e+10
50%      3.470108e+10
75%      7.859223e+10
max      3.143825e+12
Name: market_cap, dtype: float64

In [6]:
fig = px.histogram(
    snp_data,
    x="market_cap",
    title="Market capitalization of S&P 500 companies",
    nbins=3000,
)
fig.show()

In [7]:
# plotly.io.write_json(fig, 'data/market_cap.json')

In [8]:
market_cap_limit = 3421630434.78
first_criteria = snp_data[snp_data["market_cap"] >= market_cap_limit]["symbol"].unique()
print(len(first_criteria))

503


In [9]:
# plot for top 10 largest companies with their market cap
fig = px.bar(
    snp_data.sort_values(by="market_cap", ascending=False).head(10),
    x="market_cap",
    y="name",
    title="Top 10 largest companies in the S&P 500",
    orientation="h",
)
fig.show()

We can see that all companies have a market cap above 3.4 billion dollars since these are in fact the S&P 500 companies - the biggest companies in the US. This means that investing in any of these companies would satisfy the first criteria. However, if we want to use the same criteria for other indexes (say with smaller capitalization), this criteria would be useful to determine which companies are of adequate size. Note that, the limit of 3.4 billion dollars applies for 2024, so we can adjust this limit for other years.

In [10]:
plotly.io.write_json(fig, "data/10_largest_companies.json")

In [11]:
# Uncomment to save data
# snp_data.to_csv('snp_data.csv', index=False)

#### 2. Strong financial condition
Strong financial condition (current ratio > 2) - Current assets at least twice their current liabilities.

In [12]:
ratios = [api.get_ratios(ticker) for ticker in tqdm(snp_data["symbol"])]
ratios_df = pd.DataFrame.from_records(ratios)
companies_without_ratios = ratios_df[ratios_df["currentRatio"].isna()]["symbol"]
print(f"Companies without ratios: {len(companies_without_ratios)}")
# set time limit for the while loop
time_limit = datetime.datetime.now() + datetime.timedelta(seconds=15)
while len(companies_without_ratios) > 0 and datetime.datetime.now() < time_limit:
    new_ratios = [api.get_ratios(ticker) for ticker in tqdm(companies_without_ratios)]
    new_ratios_df = pd.DataFrame.from_records(new_ratios)
    ratios_df = (
        pd.concat([ratios_df, new_ratios_df])
        .drop_duplicates(subset="symbol", keep="last")
        .reset_index(drop=True)
    )
    companies_without_ratios = ratios_df[ratios_df["currentRatio"].isna()]["symbol"]

# Sanity check
print(len(ratios_df))

100%|██████████| 503/503 [01:21<00:00,  6.20it/s]


Companies without ratios: 105


100%|██████████| 105/105 [00:15<00:00,  6.86it/s]

503





In [13]:
ratios_df["name"] = ratios_df.apply(
    lambda x: snp_data[snp_data["symbol"] == x["symbol"]]["name"].values[0], axis=1
)

In [14]:
second_criteria = ratios_df[ratios_df["currentRatio"] > 2]["symbol"].unique()
print(len(second_criteria))

116


In [15]:
# outliers
ratios_df[ratios_df["currentRatio"] > 50]

Unnamed: 0,symbol,date,calendarYear,period,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,daysOfInventoryOutstanding,operatingCycle,...,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue,name
212,MET,2024-12-31,2024,FY,84.626289,84.626289,12.930412,160.680001,0.0,160.680001,...,13.06824,3.962189,3.962189,3.962189,0.056723,0.827504,0.029858,5.281301,2.107489,MetLife
235,COF,2024-12-31,2024,FY,132.992492,132.992492,4.546547,23.629065,0.0,23.629065,...,14.366961,3.758085,3.758085,3.758085,-4.651789,1.744811,0.016998,9500.350345,1.122714,Capital One
246,HBAN,2024-12-31,2024,FY,145.517588,145.517588,8.467337,278.873097,0.0,278.873097,...,12.172484,13.01081,13.01081,13.01081,0.902196,2.380266,0.006056,0.0,1.196283,Huntington Bancshares
258,L,2024-12-31,2024,FY,147.513043,147.513043,4.704348,213.084286,0.0,213.084286,...,13.192234,7.795161,6.166552,6.166552,6.925923,1.082071,0.002948,9.336377,0.976129,Loews Corporation


In [16]:
df_current_ratio_gt_2 = ratios_df[
    (ratios_df["currentRatio"] > 2) & (ratios_df["currentRatio"] < 50)
].sort_values(by="currentRatio", ascending=False)
fig = px.bar(
    df_current_ratio_gt_2,
    x="symbol",
    y="currentRatio",
    title="Current ratio of S&P 500 companies",
    hover_name="name",
)
fig.show()

In [17]:
plotly.io.write_json(fig, "data/current_ratio.json")

In [18]:
# Uncomment to save data
# ratios_df.to_csv('ratios.csv', index=False)

#### 3. Earnings stability
- Positive earnings for each of the last 10 years. (Earnings per share). 
- Note that it is possible that some companies went public less than 10 years ago, so they are automatically excluded from this criteria. 
- Also, depending on when you run this code, you may end up with 11 years - where the average EPS per year for the 11th year will be calculated with less than four quarters. This is because we request for 44 quarters (11 years) of data - the response includes data for future quarters as well.
- Advice: If you like the business, go deeper into the data - the indicators may seem misleading. For example, Amazon is not on the list just because it has a negative EPS for one quarter in 2014.

In [19]:
# TODO: Add long term debt to net current assets (working capital) explanation + graph

In [20]:
earnings = [api.get_earnings(ticker) for ticker in tqdm(snp_data["symbol"])]
earnings = [item for sublist in earnings for item in sublist]
earnings_df = pd.DataFrame.from_records(earnings)
earnings_df = earnings_df.dropna()
earnings_df

100%|██████████| 503/503 [01:21<00:00,  6.21it/s]


Unnamed: 0,date,symbol,eps,epsEstimated,time,revenue,revenueEstimated,updatedFromDate,fiscalDateEnding
3,2025-02-26,INVH,0.23,0.47,amc,6.591300e+08,6.587053e+08,2025-04-26,2024-12-31
4,2024-10-30,INVH,0.15,0.17,amc,6.603220e+08,6.527300e+08,2024-10-31,2024-09-30
5,2024-07-24,INVH,0.12,0.18,amc,6.749160e+08,6.425800e+08,2025-04-25,2024-06-30
6,2024-04-30,INVH,0.23,0.46,amc,6.676030e+08,6.420400e+08,2025-04-25,2024-03-31
7,2024-02-13,INVH,0.21,0.45,amc,6.243210e+08,6.236400e+08,2025-04-25,2023-12-31
...,...,...,...,...,...,...,...,...,...
15498,2016-02-02,XOM,0.67,0.63,bmo,5.769100e+10,3.578494e+10,2025-04-24,2015-12-31
15499,2015-10-30,XOM,1.01,0.89,bmo,6.567900e+10,5.787555e+10,2025-04-24,2015-09-30
15500,2015-07-31,XOM,1.00,1.11,bmo,7.136000e+10,7.920960e+10,2025-04-24,2015-06-30
15501,2015-04-30,XOM,1.17,0.83,bmo,6.475800e+10,4.593944e+10,2025-04-24,2015-03-31


In [21]:
# calculate yearly eps per company
earnings_df["year"] = pd.to_datetime(earnings_df["date"]).dt.year
earnings_df = earnings_df.groupby(["symbol", "year"]).agg({"eps": "mean"}).reset_index()

In [22]:
# group by company and get all symbols which always have positive eps
earnings_df["name"] = earnings_df.apply(
    lambda x: snp_data[snp_data["symbol"] == x["symbol"]]["name"].values[0], axis=1
)
third_criteria = (
    earnings_df.groupby("symbol").filter(lambda x: all(x["eps"] > 0))["symbol"].unique()
)
print(len(third_criteria))

308


In [23]:
# plot average eps per company - for example, tech companies
# px.line(earnings_df[earnings_df['symbol'].isin(['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META'])], x='year', y='eps', color='symbol', title='Average earnings per share per year')
fig = px.line(
    earnings_df[earnings_df["symbol"].isin(third_criteria)],
    x="year",
    y="eps",
    color="name",
    title="Average earnings per share per year",
)
fig.show()

In [24]:
plotly.io.write_json(fig, "data/positive_eps.json")

#### 4. Dividend record
Uninterrupted dividends for at least the past 20 years.

In [25]:
dividends_df = pd.DataFrame()
for ticker in tqdm(snp_data["symbol"]):
    dividends = api.get_dividends(ticker)
    dividends_df_company = pd.DataFrame.from_records(dividends)
    if dividends_df_company.empty:
        dividends_df = pd.concat(
            [dividends_df, pd.DataFrame({"symbol": ticker, "num_years": 0}, index=[0])]
        )
    else:
        dividends_df_company["year"] = pd.to_datetime(
            dividends_df_company["date"]
        ).dt.year
        last_20_years = [
            year
            for year in dividends_df_company["year"].unique()
            if year in range(2005, 2025)
        ]
        num_years = len(last_20_years)
        dividends_df = pd.concat(
            [
                dividends_df,
                pd.DataFrame({"symbol": ticker, "num_years": num_years}, index=[0]),
            ]
        )

100%|██████████| 503/503 [01:08<00:00,  7.30it/s]


In [26]:
dividends_df = dividends_df.merge(snp_data[["symbol", "name"]], on="symbol")
dividends_df[dividends_df["num_years"] >= 20]

Unnamed: 0,symbol,num_years,name
5,LII,20,Lennox International
7,TPL,20,Texas Pacific Land Corporation
9,ERIE,20,Erie Indemnity
23,HUBB,20,Hubbell Incorporated
32,BG,20,Bunge Global
...,...,...,...
498,SO,20,Southern Company
499,SPGI,20,S&P Global
500,UNP,20,Union Pacific Corporation
501,XEL,20,Xcel Energy


In [27]:
fourth_criteria = dividends_df[dividends_df["num_years"] >= 20]["symbol"].unique()
print(len(fourth_criteria))

215


#### 5. Earnings growth
A minimum increase of at least one-third in per-share earnings in the past 10 years using 3-year averages at the beginning and end.

In [28]:
# remove companies which don't have eps for the last 10 years
earnings_df_yearly = earnings_df.groupby("symbol").filter(lambda x: len(x) >= 10)
earnings_df_yearly

Unnamed: 0,symbol,year,eps,name
0,A,2014,0.8800,Agilent Technologies
1,A,2015,0.4325,Agilent Technologies
2,A,2016,0.4950,Agilent Technologies
3,A,2017,0.5925,Agilent Technologies
4,A,2018,0.6975,Agilent Technologies
...,...,...,...,...
3827,ZTS,2021,1.1525,Zoetis
3828,ZTS,2022,1.1825,Zoetis
3829,ZTS,2023,1.3075,Zoetis
3830,ZTS,2024,1.4400,Zoetis


In [29]:
# average first three years and last three years
earnings_growth_df = pd.DataFrame(columns=["symbol", "start_eps", "end_eps", "growth"])
for symbol in earnings_df_yearly["symbol"].unique():
    # generate dataframe (symbol, start_eps, end_eps, growth)
    earnings_growth_df = pd.concat(
        [
            earnings_growth_df,
            pd.DataFrame(
                {
                    "symbol": symbol,
                    "start_eps": earnings_df_yearly[
                        earnings_df_yearly["symbol"] == symbol
                    ]
                    .head(3)["eps"]
                    .mean(),
                    "end_eps": earnings_df_yearly[
                        earnings_df_yearly["symbol"] == symbol
                    ]
                    .tail(3)["eps"]
                    .mean(),
                },
                index=[0],
            ),
        ]
    )
earnings_growth_df["growth"] = earnings_growth_df.apply(
    lambda x: (x["end_eps"] - x["start_eps"]) / abs(x["start_eps"]), axis=1
)
earnings_growth_df["gt_33_percent"] = earnings_growth_df.apply(
    lambda x: x["growth"] > 0.33, axis=1
)
earnings_growth_df[earnings_growth_df["gt_33_percent"]].sort_values(
    "growth", ascending=False
)


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Unnamed: 0,symbol,start_eps,end_eps,growth,gt_33_percent
0,EOG,-0.001389,2.947500,2123.200000,True
0,VRTX,0.026667,2.600833,96.531250,True
0,NVDA,0.010000,0.587500,57.750000,True
0,CHTR,-0.151944,8.580833,57.473492,True
0,OXY,0.025000,0.930000,36.200000,True
...,...,...,...,...,...
0,WYNN,0.988333,1.346667,0.362563,True
0,SLB,0.586667,0.789167,0.345170,True
0,FTV,0.729167,0.976667,0.339429,True
0,HUM,2.325000,3.110000,0.337634,True


In [30]:
fifth_criteria = earnings_growth_df[earnings_growth_df["gt_33_percent"]][
    "symbol"
].unique()
print(len(fifth_criteria))

264


In [31]:
earnings_growth_fifth_criterion = earnings_growth_df[
    (earnings_growth_df["symbol"].isin(fifth_criteria))
    & (earnings_growth_df["growth"] <= 100)
].sort_values("growth", ascending=False)
earnings_growth_fifth_criterion["name"] = earnings_growth_fifth_criterion.apply(
    lambda x: snp_data[snp_data["symbol"] == x["symbol"]]["name"].values[0], axis=1
)
earnings_growth_fifth_criterion["growth"] = (
    earnings_growth_fifth_criterion["growth"] * 100
)

In [32]:
fig = px.bar(
    earnings_growth_fifth_criterion,
    x="symbol",
    y="growth",
    title="Earnings growth of S&P 500 companies (in %)",
    hover_name="name",
)
fig.show()

In [33]:
plotly.io.write_json(fig, "data/eps_growth.json")

#### 6. Moderate price/earnings ratio
- Current price is not more than 15 times average earnings of the past 3 years.
- Since the API offers only the trailing twelve months (TTM) P/E ratio, we will use that instead.

In [34]:
ratios_ttm = [api.get_ratios_ttm(ticker) for ticker in tqdm(snp_data["symbol"])]
ratios_ttm_df = pd.DataFrame.from_records(ratios_ttm)
ratios_ttm_df

100%|██████████| 503/503 [01:09<00:00,  7.19it/s]


Unnamed: 0,dividendYielTTM,dividendYielPercentageTTM,peRatioTTM,pegRatioTTM,payoutRatioTTM,currentRatioTTM,quickRatioTTM,cashRatioTTM,daysOfSalesOutstandingTTM,daysOfInventoryOutstandingTTM,...,priceEarningsRatioTTM,priceToFreeCashFlowsRatioTTM,priceToOperatingCashFlowsRatioTTM,priceCashFlowRatioTTM,priceEarningsToGrowthRatioTTM,priceSalesRatioTTM,enterpriseValueMultipleTTM,priceFairValueTTM,dividendPerShareTTM,symbol
0,0.000000,0.000000,636.637679,3.837268,0.000000,1.664263,1.664263,0.905588,29.446465,0.000000,...,636.637679,44.153034,36.729097,36.729097,3.837268,7.420609,145.469918,10.035427,0.0000,DASH
1,0.023172,2.317189,-34.147138,0.110443,-0.543417,0.639449,0.639449,0.101505,105.739603,0.000000,...,-34.147138,6125.827500,15.618870,15.618870,0.110443,5.790007,29.611583,1.388048,2.4400,EXE
2,0.002419,0.241884,1358.990551,0.503330,7.152611,1.296590,1.296590,0.783644,28.817567,0.000000,...,1358.990551,22.752760,21.903272,21.903272,0.503330,4.568668,22.467053,3.123568,0.3800,TKO
3,0.015676,1.567564,16.447000,2.713755,0.243300,1.440715,0.743828,0.634411,5.569895,119.335859,...,16.447000,16.399592,13.918203,13.918203,2.713755,2.421497,11.100444,8.836668,2.3700,WSM
4,0.013868,1.386807,17.082312,-0.816300,0.259777,0.800045,0.800045,0.059960,122.691660,0.000000,...,17.082312,23.394415,24.034965,24.034965,-0.816300,2.914223,7.929394,4.531719,1.8500,APO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498,0.031848,3.184784,22.520173,-3.346964,0.671211,0.668668,0.458013,0.066904,58.579741,78.139734,...,22.520173,180.562943,10.125795,10.125795,-3.346964,3.722878,12.530218,2.984560,2.8800,SO
499,0.007687,0.768750,38.793667,4.447557,0.294316,0.854036,0.854036,0.260638,73.657704,0.000000,...,38.793667,27.069844,26.273862,26.273862,4.447557,10.603483,23.514640,4.507735,3.6900,SPGI
500,0.024943,2.494257,25.097235,139.415140,0.473567,0.729508,0.729508,0.226776,0.039354,0.000000,...,25.097235,27.963829,17.739215,17.739215,139.415140,7.013901,13.569541,7992.224578,5.3200,UNP
501,0.032065,3.206522,20.546349,-17.721226,0.621958,0.796321,0.703605,0.166592,34.258679,25.201613,...,20.546349,-125.377789,8.589522,8.589522,-17.721226,2.898538,12.567781,2.003383,2.2125,XEL


In [35]:
ratios_ttm_df[ratios_ttm_df["peRatioTTM"] <= 15][["symbol", "peRatioTTM"]]

Unnamed: 0,symbol,peRatioTTM
1,EXE,-34.147138
12,CRWD,-5444.288986
19,SMCI,14.958010
20,BLDR,12.926314
32,BG,9.820211
...,...,...
485,MO,8.781233
486,MRK,12.282663
490,OXY,12.249313
497,SLB,11.254014


In [None]:
ratios_ttm_df["name"] = ratios_ttm_df.apply(
    lambda x: snp_data[snp_data["symbol"] == x["symbol"]]["name"].values[0], axis=1
)
ratios_ttm_df["sector"] = ratios_ttm_df.apply(
    lambda x: snp_data[snp_data["symbol"] == x["symbol"]]["sector"].values[0], axis=1
)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [41]:
sixth_criteria = ratios_ttm_df[
    (ratios_ttm_df["peRatioTTM"] <= 15) & (ratios_ttm_df["peRatioTTM"] > 0)
]["symbol"].unique()
print(len(sixth_criteria))

87


In [42]:
ratios_ttm_df_sixth_criteria = ratios_ttm_df[
    ratios_ttm_df["symbol"].isin(sixth_criteria)
][["symbol", "name", "sector", "peRatioTTM"]].sort_values("peRatioTTM", ascending=False)
ratios_ttm_df_sixth_criteria

KeyError: "['name', 'sector'] not in index"

In [None]:
fig = px.bar(
    ratios_ttm_df_sixth_criteria,
    x="symbol",
    y="peRatioTTM",
    color="sector",
    hover_name="name",
)
fig.show()

In [None]:
plotly.io.write_json(fig, "data/pe_ratio.json")

#### 7. Moderate price to assets ratio
- Price should not be more than 1.5 times the book value last reported.
- This ratio is calculated by dividing the company's current stock price per share by its book value per share (BVPS).

In [None]:
# TODO: P/E * P/B < 22.5 (same usage as P/B <= 1.5)

In [43]:
ratios_df[ratios_df["priceToBookRatio"] <= 1.5][["symbol", "priceToBookRatio"]]

Unnamed: 0,symbol,priceToBookRatio
1,EXE,0.889738
8,DELL,-49.493117
31,FICO,-49.817540
32,BG,1.095269
40,PCG,1.433062
...,...,...
460,CTRA,1.444191
463,PM,-15.916928
482,KIM,1.477081
483,BXP,1.473305


In [44]:
seventh_criteria = ratios_df[ratios_df["priceToBookRatio"] <= 1.5]["symbol"].unique()
seventh_criteria_df = ratios_df[ratios_df["symbol"].isin(seventh_criteria)]
seventh_criteria_df["sector"] = seventh_criteria_df.apply(
    lambda x: snp_data[snp_data["symbol"] == x["symbol"]]["sector"].values[0], axis=1
)
seventh_criteria_df = seventh_criteria_df[seventh_criteria_df["priceToBookRatio"] > 0]
print(len(seventh_criteria))

87




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [45]:
fig = px.bar(
    seventh_criteria_df,
    x="symbol",
    y="priceToBookRatio",
    color="sector",
    hover_name="name",
)
fig.show()

In [46]:
plotly.io.write_json(fig, "data/pb_ratio.json")

### Combine all criteria

In [47]:
# Store all dataframes
date = datetime.datetime.now().strftime("%Y-%m-%d")
snp_data.to_csv(f"data/snp_data_{date}.csv", index=False)
ratios_df.to_csv(f"data/ratios_{date}.csv", index=False)
earnings_df.to_csv(f"data/earnings_{date}.csv", index=False)
earnings_growth_df.to_csv(f"data/earnings_growth_{date}.csv", index=False)
dividends_df.to_csv(f"data/dividends_{date}.csv", index=False)
ratios_ttm_df.to_csv(f"data/ratios_ttm_{date}.csv", index=False)

In [48]:
# compile a dataframe of shape (symbol, criteria1, criteria2, ..., criteria7), where value of criteria is True if the company meets the criteria
criteria_df = (
    snp_data[["symbol", "name"]]
    .merge(
        pd.DataFrame({"symbol": first_criteria, "large_market_cap": True}),
        on="symbol",
        how="left",
    )
    .fillna(False)
)
criteria_df = criteria_df.merge(
    pd.DataFrame({"symbol": second_criteria, "double_current_ratio": True}),
    on="symbol",
    how="left",
).fillna(False)
criteria_df = criteria_df.merge(
    pd.DataFrame({"symbol": third_criteria, "earnings_stability": True}),
    on="symbol",
    how="left",
).fillna(False)
criteria_df = criteria_df.merge(
    pd.DataFrame({"symbol": fourth_criteria, "dividend_record": True}),
    on="symbol",
    how="left",
).fillna(False)
criteria_df = criteria_df.merge(
    pd.DataFrame({"symbol": fifth_criteria, "earnings_growth": True}),
    on="symbol",
    how="left",
).fillna(False)
criteria_df = criteria_df.merge(
    pd.DataFrame({"symbol": sixth_criteria, "moderate_pe_ratio": True}),
    on="symbol",
    how="left",
).fillna(False)
criteria_df = criteria_df.merge(
    pd.DataFrame({"symbol": seventh_criteria, "moderate_pb_ratio": True}),
    on="symbol",
    how="left",
).fillna(False)
# count satisfied criteria
criteria_df["num_criteria"] = criteria_df[
    [
        "large_market_cap",
        "double_current_ratio",
        "earnings_stability",
        "dividend_record",
        "earnings_growth",
        "moderate_pe_ratio",
        "moderate_pb_ratio",
    ]
].sum(axis=1)
criteria_df.sort_values("num_criteria", ascending=False)


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True

Unnamed: 0,symbol,name,large_market_cap,double_current_ratio,earnings_stability,dividend_record,earnings_growth,moderate_pe_ratio,moderate_pb_ratio,num_criteria
340,COF,Capital One,True,True,True,True,True,True,True,7
485,MO,Altria,True,False,True,True,True,True,True,6
89,TROW,T. Rowe Price,True,True,True,True,True,True,False,6
317,MET,MetLife,True,True,True,True,True,True,False,6
294,STT,State Street Corporation,True,True,True,False,True,True,True,6
...,...,...,...,...,...,...,...,...,...,...
25,VLTO,Veralto,True,False,False,False,False,False,False,1
22,UBER,Uber,True,False,False,False,False,False,False,1
34,GEHC,GE HealthCare,True,False,False,False,False,False,False,1
2,TKO,TKO Group Holdings,True,False,False,False,False,False,False,1


In [None]:
criteria_df.to_csv(f"data/all_criteria_{date}.csv", index=False)

In [49]:
import plotly.graph_objects as go


# Convert column names to have line breaks for better fitting
def format_header(col_name):
    # Replace underscores with line breaks
    return col_name.replace("_", "<br>")


# Format all column headers
formatted_headers = [format_header(col) for col in criteria_df.columns]

# Adjust column widths - first column narrower, second wider for names, others standard
columnwidth = [60, 150] + [100] * (len(criteria_df.columns) - 2)

fig = go.Figure(
    data=go.Table(
        columnwidth=columnwidth,
        header=dict(
            values=list(formatted_headers),
            fill_color="rgb(153, 128, 250)",  # Purple similar to #9980FA
            font=dict(color="white", size=12, family="Arial, sans-serif"),
            align="center",
            height=60,
            line=dict(color="white", width=1.5),
        ),
        cells=dict(
            values=criteria_df.sort_values(by="num_criteria", ascending=False)
            .transpose()
            .values.tolist(),
            fill_color="rgb(246, 229, 141)",  # Yellow similar to #f6e58d
            font=dict(color="rgb(50, 50, 50)", size=12, family="Arial, sans-serif"),
            align=["center", "left"]
            + ["center"] * (len(criteria_df.columns) - 2),  # Align name column left
            height=30,
            line=dict(color="white", width=1),
        ),
    )
)

# Update layout for better visual appearance
fig.update_layout(
    paper_bgcolor="white",
    plot_bgcolor="white",
    margin=dict(l=10, r=10, t=10, b=10),
    height=450,
    width=1000,
)

fig.show()

In [None]:
plotly.io.write_json(fig, "data/all_criteria.json")