In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
import plotly.express as px

from src.api.api_utils import FinancialModelingPrepAPI
from src.analysis.analysis_utils import (
    compute_quarterly_metric_by_key,
    analyze_by_sector,
    plot_metric,
)

tqdm.pandas()

In [6]:
load_dotenv()
api_key = os.getenv("API_KEY")
api = FinancialModelingPrepAPI(api_key)
n_years = 10
n_quarters = n_years * 4

In [7]:
snp_url = f"https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={api_key}"
response = api.session.get(snp_url).json()
snp_data = pd.DataFrame.from_records(response)
snp_data.head()

Unnamed: 0,symbol,name,sector,subSector,headQuarter,dateFirstAdded,cik,founded
0,IBKR,Interactive Brokers Group,Financial Services,Investment - Banking & Investment Services,"Greenwich, CT",2025-08-28,1381197,1982
1,XYZ,"Block, Inc.",Technology,Software - Infrastructure,"Oakland, California",2025-07-23,1512673,2009
2,TTD,"The Trade Desk, Inc.",Technology,Software - Application,"Ventura, California",2025-07-18,1671933,2009
3,DDOG,Datadog,Technology,Software - Application,"New York City, New York",2025-07-09,1561550,2010
4,COIN,Coinbase Global,Financial Services,Financial - Data & Stock Exchanges,"Wilmington, Delaware",2025-05-19,1679788,2012


In [8]:
income_statements = [
    api.get_income_statement(ticker)[:n_quarters][::-1] for ticker in snp_data["symbol"]
]
income_statements_df = pd.DataFrame(income_statements)

### Gross Profit Ratio
- Gross profit ratio is the ratio of gross profit to total revenue.

In [None]:
quarterly_gross_profit_ratios_df = compute_quarterly_metric_by_key(
    income_statements_df, snp_data, "grossProfitRatio", n_quarters
)
quarterly_gross_profit_ratios_df

Unnamed: 0,3M,A. O. Smith,AES Corporation,APA Corporation,AT&T,AbbVie,Abbott Laboratories,Accenture,Adobe Inc.,Advanced Micro Devices,...,Willis Towers Watson,"Workday, Inc.",Wynn Resorts,Xcel Energy,Xylem Inc.,Yum! Brands,Zebra Technologies,Zimmer Biomet,Zoetis,eBay
0,0.497277,0.410654,0.194492,-3.540076,0.544755,0.803668,0.56466,0.299408,0.845866,0.225259,...,0.326241,0.666671,0.374793,0.429574,0.389135,0.27225,0.451892,0.686698,0.653213,0.793711
1,0.47561,0.410698,0.183726,-5.652744,0.532491,0.769531,0.574788,0.302684,0.856454,0.295407,...,0.37577,0.682525,0.391718,0.34748,0.392354,0.245507,0.448996,0.653858,0.610675,0.787683
2,0.503577,0.412467,0.170304,-0.019319,0.546071,0.770225,0.561924,0.28222,0.855525,0.323317,...,0.441361,0.685531,0.390617,0.365278,0.38843,0.406791,0.460449,0.66355,0.665232,0.77679
3,0.504176,0.425337,0.193372,0.220779,0.544052,0.782238,0.571161,0.299871,0.86154,0.310613,...,0.368907,0.687875,0.384132,0.379423,0.395923,0.412856,0.461889,0.669028,0.669702,0.778924
4,0.497989,0.414242,0.194492,0.248784,0.538029,0.766169,0.541682,0.296193,0.865361,0.045142,...,0.350028,0.69007,0.373509,0.43967,0.397993,0.426219,0.457965,0.738488,0.654311,0.775372
5,0.493792,0.41484,0.183726,0.283162,0.348773,0.771189,0.546972,0.303148,0.858866,0.31736,...,0.384017,0.701743,0.370251,0.356432,0.370776,0.42895,0.458599,0.691123,0.618637,0.774948
6,0.494079,0.408514,0.170304,0.359127,0.389966,0.75283,0.437885,0.285791,0.864935,0.336382,...,0.469168,0.714964,0.375665,0.362186,0.384687,0.432604,0.463584,0.739961,0.623071,0.776813
7,0.483483,0.414386,0.238041,0.263001,0.378668,0.779954,0.46075,0.310965,0.857191,0.335361,...,0.379928,0.698588,0.366572,0.39017,0.39433,0.446133,0.458705,0.729315,0.635934,0.768499
8,0.503059,0.408988,0.234987,0.311735,0.36445,0.768978,0.509299,0.299419,0.865009,0.36048,...,0.341793,0.705152,0.378751,0.465031,0.394142,0.458217,0.458824,0.723733,0.659985,0.777022
9,0.488736,0.421285,0.242803,0.42561,0.328222,0.705388,0.498221,0.303572,0.875465,0.337313,...,0.348893,0.706448,0.365496,0.359117,0.39859,0.488269,0.457115,0.714892,0.671233,0.774206


In [8]:
# get all companies where mean is >= 0.4
gpr_filtered = quarterly_gross_profit_ratios_df.apply(lambda x: x.mean() >= 0.4)
print(f"Number of companies with mean gross profit ratio >= 0.4: {gpr_filtered.sum()}")

Number of companies with mean gross profit ratio >= 0.4: 275


In [14]:
plot_metric(
    quarterly_gross_profit_ratios_df[gpr_filtered[gpr_filtered].index],
    title="Gross Profit Ratio (Companies with mean > 40%)",
    y_label="Gross Profit Ratio",
)

In [None]:
# analyze gross profit ratio by sector
sector_gross_median = analyze_by_sector(quarterly_gross_profit_ratios_df, snp_data)
sector_gross_median

Unnamed: 0_level_0,gross_mean,num_companies
sector,Unnamed: 1_level_1,Unnamed: 2_level_1
Financial Services,0.769919,68
Real Estate,0.643563,31
Healthcare,0.579282,61
Technology,0.569756,80
Communication Services,0.506698,22
Utilities,0.380257,32
Consumer Defensive,0.373017,37
Industrials,0.318204,73
Consumer Cyclical,0.313724,55
Basic Materials,0.266514,20


### Ratio of SGA (Selling, General, and Administrative) Expenses to Gross Profit
- SGA expenses are the costs associated with selling a product or service and managing the company.
- A consistent ratio is imporant even if it's high (Coca Cola has a consistent ratio of around 59%).
- Optimally, it should be consistently low (under 30% is considered fantastic)

In [16]:
# compute gross profit ratio per company using helper
quarterly_sga_ratios_df = compute_quarterly_metric_by_key(
    income_statements_df=income_statements_df,
    snp_data=snp_data,
    key=("sellingGeneralAndAdministrativeExpenses", "grossProfit"),
)
quarterly_sga_ratios_df

Unnamed: 0,3M,A. O. Smith,AES Corporation,APA Corporation,AT&T,AbbVie,Abbott Laboratories,Accenture,Adobe Inc.,Advanced Micro Devices,...,Willis Towers Watson,"Workday, Inc.",Wynn Resorts,Xcel Energy,Xylem Inc.,Yum! Brands,Zebra Technologies,Zimmer Biomet,Zoetis,eBay
0,0.398957,0.584729,0.058394,-0.015993,0.427659,0.308562,0.572902,0.563326,0.520816,0.451883,...,0.0,0.735416,0.312369,-0.000553,0.589744,0.351554,0.452721,0.572101,0.471627,0.463385
1,0.4431,0.590632,0.091049,-0.014199,0.375379,0.35269,0.554997,0.523414,0.524853,0.385159,...,0.0,0.738148,0.302619,0.002472,0.571795,0.54433,0.450122,0.869968,0.546272,0.467469
2,0.400161,0.606776,0.091062,-4.428571,0.381342,0.295271,0.618579,0.540874,0.502566,0.390335,...,0.0,0.712297,0.301365,0.000494,0.665653,0.413969,0.5,0.567437,0.407503,0.45
3,0.403831,0.563976,0.073025,0.336601,0.404128,0.29047,0.570256,0.514438,0.492503,0.366771,...,0.0,0.711829,0.293948,0.00097,0.615176,0.407705,0.46798,0.623541,0.42398,0.483592
4,0.398802,0.581363,0.058394,0.284916,0.409682,0.280235,0.56337,0.549923,0.462344,1.983051,...,0.0,0.733663,0.34788,0.000859,0.613445,0.417311,0.449275,0.614259,0.424877,0.479348
5,0.441558,0.603591,0.091049,0.271845,0.658946,0.315398,0.54988,0.512109,0.464655,0.344729,...,0.408108,0.722089,0.346812,0.000808,0.615764,0.1644,0.4375,0.795515,0.456962,0.461746
6,0.421385,0.602382,0.091062,0.189687,0.541854,0.277936,0.828767,0.545295,0.463213,0.365559,...,0.0,0.603387,0.288552,0.000937,0.660194,0.386623,0.458853,0.581569,0.402868,0.499162
7,0.429025,0.579601,0.078778,0.299435,0.551144,0.277696,0.703074,0.702562,0.441969,0.329016,...,0.0,0.620891,0.292854,0.000969,0.588235,0.382353,0.442822,0.529048,0.416357,0.534696
8,0.3982,0.573199,0.08254,0.226328,0.593207,0.269939,0.598045,0.550846,0.42845,0.232925,...,0.0,0.593895,0.292311,0.000713,0.573248,0.326748,0.428904,0.542372,0.368954,0.501288
9,0.467606,0.565164,0.093604,0.126074,0.683456,0.288148,0.642158,0.513187,0.412845,0.29646,...,0.416552,0.58193,0.296178,0.000449,0.546169,0.38961,0.424307,0.592555,0.368367,0.471577


In [17]:
# get all companies where sga_ratio mean is <= 0.3
# TODO: define ratio consistency as another filter
sga_filtered = quarterly_sga_ratios_df.apply(lambda x: x.mean() <= 0.3)
print(
    f"Number of companies with mean SGA to Gross Profit ratio <= 0.3: {sga_filtered.sum()}"
)

Number of companies with mean SGA to Gross Profit ratio <= 0.3: 181


In [18]:
plot_metric(
    quarterly_sga_ratios_df[sga_filtered[sga_filtered].index],
    title="SGA to Gross Profit Ratio (Companies with mean <= 30%)",
    y_label="SGA to Gross Profit Ratio",
)

In [None]:
# analyze SGA ratios by sector
sector_summary = analyze_by_sector(
    quarterly_sga_ratios_df, snp_data, include_companies=True
)
sector_summary

Unnamed: 0_level_0,gross_mean,num_companies,companies
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Consumer Defensive,0.57789,37,"Altria, Archer Daniels Midland, Brown–Forman, ..."
Consumer Cyclical,0.57536,55,"Airbnb, Amazon, Amcor, Aptiv, AutoZone, Ball C..."
Healthcare,0.505545,61,"AbbVie, Abbott Laboratories, Agilent Technolog..."
Industrials,0.493304,73,"3M, A. O. Smith, Allegion, Ametek, Automatic D..."
Communication Services,0.478916,22,"AT&T, Alphabet Inc. (Class A), Alphabet Inc. (..."
Technology,0.429979,80,"Accenture, Adobe Inc., Advanced Micro Devices,..."
Financial Services,0.352238,68,"Aflac, Allstate, American Express, American In..."
Basic Materials,0.316703,20,"Air Products, Albemarle Corporation, CF Indust..."
Energy,0.118842,24,"APA Corporation, Baker Hughes, Chevron Corpora..."
Real Estate,0.095416,31,"Alexandria Real Estate Equities, American Towe..."


### Research and Development to Gross Profit Ratio
- Competitive advantage is usually gained through a patent or a specific technological advancement (this is very common in pharmaceutical companies). However, a patent has an expiry date and with that the competitive advantage is lost.
- In IT this is very common, and that's why companies are on the lookout for the next big thing. And that's why Google, Meta, Apple, Microsoft, Amazon go through periods of dominance and periods of decline. They have a huge overlap of products and services, and they have a clear monopoly in only one or two areas. (Google - search, Meta - social media, Apple - hardware, Microsoft - software, Amazon - e-commerce).
- Because they have to update their products and services all the time, they have to spend on selling and administrative costs as well - which eats into their profit margins.

In [None]:
# compute R&D / Gross Profit ratio using helper
quarterly_rd_ratios_df = compute_quarterly_metric_by_key(
    income_statements_df,
    snp_data,
    ("researchAndDevelopmentExpenses", "grossProfit"),
    n_quarters,
)

In [None]:
plot_metric(
    quarterly_rd_ratios_df,
    title="R&D Expenses to Gross Profit Ratio Over Time",
    y_label="R&D Expenses to Gross Profit Ratio",
)

In [None]:
# analyze R&D ratios by sector using subsector grouping
sector_summary = analyze_by_sector(
    quarterly_rd_ratios_df, snp_data, sector_key="subSector", include_companies=True
)
sector_summary

Unnamed: 0_level_0,gross_mean,num_companies,companies
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Solar,2.705653,2,"Enphase Energy, First Solar"
Biotechnology,0.544387,5,"Bio-Techne, Incyte, Moderna, Regeneron Pharmac..."
Electronic Gaming & Multimedia,0.337110,2,"Electronic Arts, Take-Two Interactive"
Computer Hardware,0.300885,7,"Arista Networks, Dell Technologies, HP Inc., N..."
Software - Application,0.283358,14,"Autodesk, Cadence Design Systems, Datadog, Day..."
...,...,...,...
Steel,0.000000,2,"Nucor, Steel Dynamics"
Trucking,0.000000,1,Old Dominion
Travel Lodging,0.000000,2,"Hilton Worldwide, Marriott International"
Waste Management,0.000000,2,"Republic Services, Waste Management"


### Depreciation and Amortization
- Depreciation is the reduction in value of an asset (vehicle, building, hardware) over time.
- Amortization is the reduction in value of an intangible asset (IP, license, software) over time.
- Companies with a durable competitive advantage have low depreciation and amortization costs. (examples include < 10% of gross profit)


In [None]:
# compute Depreciation & Amortization / Gross Profit using helper
quarterly_da_ratios_df = compute_quarterly_metric_by_key(
    income_statements_df,
    snp_data,
    ("depreciationAndAmortization", "grossProfit"),
    n_quarters,
)

In [None]:
# quarterly_da_ratios_df was computed with compute_quarterly_metric_by_key
quarterly_da_ratios_df = quarterly_da_ratios_df[sorted(quarterly_da_ratios_df.columns)]
# Limit to 10%
quarterly_da_ratios_df = quarterly_da_ratios_df.loc[
    :, (quarterly_da_ratios_df <= 0.1).all()
]
print(len(quarterly_da_ratios_df))
px.line(
    quarterly_da_ratios_df,
    title="Depreciation and Amortization Expenses to Gross Profit Ratio Over Time",
    labels={
        "value": "D&A Expenses to Gross Profit Ratio",
        "index": "Quarter",
        "variable": "Company",
    },
)

40


### Interest
- In any given industry the company with the lowest ratio of interest payments to operating income is usually the company most likely to have the competitive advantage.

In [None]:
# compute Interest Expense / Operating Income using helper
quarterly_interest_to_operating_income_ratios_df = compute_quarterly_metric_by_key(
    income_statements_df, snp_data, ("interestExpense", "operatingIncome"), n_quarters
)

In [None]:
# quarterly_interest_to_operating_income_ratios_df was computed with compute_quarterly_metric_by_key
quarterly_interest_to_operating_income_ratios_df = (
    quarterly_interest_to_operating_income_ratios_df[
        sorted(quarterly_interest_to_operating_income_ratios_df.columns)
    ]
)
# Get the three companies with the lowest ratio for each sector
# name_to_sector = pd.Series(snp_data[key].values, index=snp_data['name']).to_dict()

In [None]:
# analyze interest ratios by sector
sector_summary = analyze_by_sector(
    quarterly_interest_to_operating_income_ratios_df, snp_data
)
sector_summary.sort_index()

Unnamed: 0,company,mean_interest_to_operating_income_ratio,sector
354,Omnicom Group,0.131081,Advertising Agencies
253,Interpublic Group of Companies (The),0.219300,Advertising Agencies
68,Boeing,-0.091115,Aerospace & Defense
212,General Dynamics,0.050688,Aerospace & Defense
291,Lockheed Martin,0.106980,Aerospace & Defense
...,...,...,...
403,Royal Caribbean Group,0.853604,Travel Services
183,Expedia Group,2.186956,Travel Services
353,Old Dominion,0.002406,Trucking
484,Waste Management,0.150108,Waste Management


In [None]:
plot_metric(
    quarterly_interest_to_operating_income_ratios_df,
    title="Interest Expenses to Operating Income Ratio Over Time",
    y_label="Interest Expenses to Operating Income Ratio",
)

### Income before Taxes
- The company's income after all expenses have been deducted (SGA, R&D, depreciation, amortization, interest, gain/loss on sale of assets, etc.) but before taxes have been deducted.
- Buffett uses this to calculate his ROI, if he bought a business or a stake in a business.

### EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization)
- EBITDA is sketchy because people who use it try to hide costs of doing business.
- EBITDA is good as a proxy for operating cash flow. However, it excludes depreciation of capex and sometimes stock based comp- so important to consider those separately if you’re going to use EBITDA. The reason why investors often exclude interest income/expense is because they may be recapitalizing the company with a new equity or debt structure. That interest expense may go away upon their investment. Similarly the company’s taxable position may change if it gets acquired, so investors prefer to normalize this out.

In [None]:
# compute quarterly income before tax (EBIT proxy) using helper
quarterly_ebit_df = compute_quarterly_metric_by_key(
    income_statements_df, snp_data, "incomeBeforeTax", n_quarters
)

In [None]:
plot_metric(quarterly_ebit_df, title="Quarterly EBIT Over Time", y_label="EBIT")

### Net Earnings (Net Income)

In [None]:
quarterly_net_income_ratio_df = compute_quarterly_metric_by_key(
    income_statements_df, snp_data, "netIncomeRatio", n_quarters
)

In [None]:
plot_metric(
    quarterly_net_income_ratio_df,
    title="Quarterly Net Income Ratio Over Time",
    y_label="Net Income Ratio",
)

In [32]:
quarterly_net_income_ratio_df.mean()

3M                    0.116852
A. O. Smith           0.121745
AES Corporation      -0.090847
APA Corporation      -0.270299
AT&T                  0.082855
                        ...   
Yum! Brands           0.213431
Zebra Technologies    0.072553
Zimmer Biomet         0.073673
Zoetis                0.232114
eBay                  0.354897
Length: 503, dtype: float64

In [33]:
# Rule of thumb - if net earnings / gross profit is > 20%, the company has a durable competitive advantage.
# If it's less than 10%, it's in a competitive industry.
# If it's 10-20%, it's gray area.

averages = quarterly_net_income_ratio_df.mean()
first_bracket_companies = averages[averages > 0.2].index
second_bracket_companies = averages[(averages <= 0.2) & (averages >= 0.1)].index
third_bracket_companies = averages[averages < 0.1].index

first_bracket = quarterly_net_income_ratio_df[first_bracket_companies]
second_bracket = quarterly_net_income_ratio_df[second_bracket_companies]
third_bracket = quarterly_net_income_ratio_df[third_bracket_companies]

In [None]:
plot_metric(
    first_bracket,
    title="Quarterly Net Income Ratio Over Time (Net Income / Gross Profit > 20%)",
    y_label="Net Income Ratio",
)

### EPS (Earnings Per Share)
- EPS is the company's net earnings divided by the number of outstanding shares.
- Similar to earnings stability, we also seek EPS stability (and growth) over time. Instability in EPS indicates that the business is not tuned to the supply and demand of the market. (it's under-resourced when there's demand and over-resourced when there's no demand)