In [1]:
import datetime

import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
import plotly.express as px

from api_utils import FinancialModelingPrepAPI

tqdm.pandas()
import os

In [2]:
load_dotenv()
api_key = os.getenv('API_KEY')
api = FinancialModelingPrepAPI(api_key)

In [3]:
snp_url = f'https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={api_key}'
response = api.session.get(snp_url).json()
snp_data = pd.DataFrame.from_records(response)
snp_data.head()

Unnamed: 0,symbol,name,sector,subSector,headQuarter,dateFirstAdded,cik,founded
0,APO,Apollo Global Management,Financials,Asset Management & Custody Banks,"New York City, New York",2024-12-23,1858681,1990
1,LII,Lennox International,Industrials,Building Products,"Richardson, Texas",2024-12-23,1069202,1895
2,WDAY,"Workday, Inc.",Information Technology,Application Software,"Pleasanton, California",2024-12-23,1327811,2005
3,TPL,Texas Pacific Land Corporation,Energy,Oil & Gas Exploration & Production,"Dallas, Texas",2024-11-26,1811074,1888
4,DELL,Dell Technologies,Information Technology,"Technology Hardware, Storage & Peripherals","Round Rock, Texas",2024-09-23,1571996,2016


In [9]:
income_statements = [api.get_income_statement(ticker) for ticker in snp_data['symbol']]
income_statements_df = pd.DataFrame(income_statements)

### Gross Profit Ratio
- Gross profit ratio is the ratio of gross profit to total revenue.

In [39]:
# get the gross profit ratio for each company for the last 40 quarters (10 years)
quarterly_gross_profit_ratios = dict()
for i, row in income_statements_df.iterrows():
    company_gpr = []
    for quarter in row.values[:40]:
        if quarter is not None:
            company_gpr.append(quarter['grossProfitRatio'])
    # not all companies have 40 quarters of data so fill the rest with None
    if len(company_gpr) < 40:
        company_gpr = [None] * (40 - len(company_gpr)) + company_gpr
    quarterly_gross_profit_ratios[snp_data.iloc[i]['name']] = company_gpr

In [40]:
quarterly_gross_profit_ratios_df = pd.DataFrame(quarterly_gross_profit_ratios)
quarterly_gross_profit_ratios_df.head()

Unnamed: 0,Lennox International,Texas Pacific Land Corporation,Amentum,Dell Technologies,Erie Indemnity,Palantir Technologies,Smurfit WestRock,CrowdStrike,GoDaddy,KKR,...,Pfizer,Procter & Gamble,PPG Industries,RTX Corporation,Schlumberger,Southern Company,S&P Global,Union Pacific Corporation,Xcel Energy,ExxonMobil
0,0.326013,0.932411,,,0.180146,,,,0.616183,0.7056,...,0.628573,0.520587,0.417923,0.200806,0.209848,0.548392,0.70014,0.454769,0.293633,0.319392
1,0.336434,0.890231,,,0.190197,,,,0.607016,0.238774,...,0.620568,0.496006,0.397372,0.181532,0.209577,0.355253,0.612285,0.454137,0.202827,0.223679
2,0.324706,0.917872,,,0.157429,,,,0.591923,0.106932,...,0.705558,0.512008,0.399443,0.18446,0.182319,0.312519,0.679175,0.451169,0.238447,0.233289
3,0.307239,0.922319,,,0.155201,,,,0.531764,0.255271,...,0.39701,0.526888,0.418161,0.201184,0.199778,0.41158,0.672589,0.449424,0.237071,0.215013
4,0.313621,0.923167,,,0.172854,,,,0.628961,0.491169,...,0.198685,0.519866,0.407407,0.05303,0.206739,0.537822,0.345655,0.430062,0.494539,0.323484


In [41]:
# plot the gross profit ratio for each company over time
px.line(quarterly_gross_profit_ratios_df, title='Gross Profit Ratio Over Time', labels={'value': 'Gross Profit Ratio', 'index': 'Quarter', 'variable': 'Company'})

In [84]:
# get all companies where mean is >= 0.5
companies_gpr_gt_50 = quarterly_gross_profit_ratios_df.apply(lambda x: x.mean() >= 0.5)
companies_gpr_gt_50 = companies_gpr_gt_50[companies_gpr_gt_50 == True].index
len(companies_gpr_gt_50)

210

In [85]:
# companies with average GPR >= 0.5
px.line(quarterly_gross_profit_ratios_df[companies_gpr_gt_50], title='Gross Profit Ratio (Companies with mean > 50%)', labels={'value': 'Gross Profit Ratio', 'index': 'Quarter', 'variable': 'Company'})

### Ratio of SGA (Selling, General, and Administrative) Expenses to Gross Profit
- SGA expenses are the costs associated with selling a product or service and managing the company.
- A consistent ratio is imporant even if it's high (Coca Cola has a consistent ratio of around 59%).
- Optimally, it should be consistently low (under 30% is considered fantastic)

In [60]:
quarterly_sga_ratios = dict()
for i, row in income_statements_df.iterrows():
    company_sga_ratio = []
    for quarter in row.values[:40]:
        if quarter is not None:
            try:
                company_sga_ratio.append(quarter['sellingGeneralAndAdministrativeExpenses'] / quarter['grossProfit'])
            except ZeroDivisionError:
                company_sga_ratio.append(0)
    if len(company_sga_ratio) < 40:
        company_sga_ratio = [None] * (40 - len(company_sga_ratio)) + company_sga_ratio
    quarterly_sga_ratios[snp_data.iloc[i]['name']] = company_sga_ratio

In [61]:
quarterly_sga_ratios_df = pd.DataFrame(quarterly_sga_ratios)

In [62]:
px.line(quarterly_sga_ratios_df, title='SGA Expenses to Gross Profit Ratio Over Time', labels={'value': 'SGA Expenses to Gross Profit Ratio', 'index': 'Quarter', 'variable': 'Company'})

### Research and Development to Gross Profit Ratio
- Competitive advantage is usually gained through a patent or a specific technological advancement (this is very common in pharmaceutical companies). However, a patent has an expiry date and with that the competitive advantage is lost.
- In IT this is very common, and that's why companies are on the lookout for the next big thing. And that's why Google, Meta, Apple, Microsoft, Amazon go through periods of dominance and periods of decline. They have a huge overlap of products and services, and they have a clear monopoly in only one or two areas. (Google - search, Meta - social media, Apple - hardware, Microsoft - software, Amazon - e-commerce).
- Because they have to update their products and services all the time, they have to spend on selling and administrative costs as well - which eats into their profit margins.

In [63]:
quarterly_rd_ratios = dict()
for i, row in income_statements_df.iterrows():
    company_rd_ratio = []
    for quarter in row.values[:40]:
        if quarter is not None:
            try:
                company_rd_ratio.append(quarter['researchAndDevelopmentExpenses'] / quarter['grossProfit'])
            except ZeroDivisionError:
                company_rd_ratio.append(0)
    if len(company_rd_ratio) < 40:
        company_rd_ratio = [None] * (40 - len(company_rd_ratio)) + company_rd_ratio
    quarterly_rd_ratios[snp_data.iloc[i]['name']] = company_rd_ratio

In [64]:
quarterly_rd_ratios_df = pd.DataFrame(quarterly_rd_ratios)

In [65]:
px.line(quarterly_rd_ratios_df, title='R&D Expenses to Gross Profit Ratio Over Time', labels={'value': 'R&D Expenses to Gross Profit Ratio', 'index': 'Quarter', 'variable': 'Company'})

### Depreciation and Amortization
- Depreciation is the reduction in value of an asset (vehicle, building, hardware) over time.
- Amortization is the reduction in value of an intangible asset (IP, license, software) over time.
- Companies with a durable competitive advantage have low depreciation and amortization costs. (examples include < 10% of gross profit)


### EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization)
- EBITDA is sketchy because people who use it try to hide costs of doing business.
- EBITDA is good as a proxy for operating cash flow. However, it excludes depreciation of capex and sometimes stock based comp- so important to consider those separately if you’re going to use EBITDA. The reason why investors often exclude interest income/expense is because they may be recapitalizing the company with a new equity or debt structure. That interest expense may go away upon their investment. Similarly the company’s taxable position may change if it gets acquired, so investors prefer to normalize this out.

### Interest
- Related to why EBITDA can be useful sometimes - companies with high interest expenses are either in a fiercely competitive industry (and therefore are forced to take on a lot of debt to grow), or they acquired the debt during a leveraged buyout.
- Useful to check the ratio of interest to operating income (gross profit - operating expenses). It varies from industry to industry - consumer goods companies have a low ratio (<15%), while financial services companies have a higher ratio (~30%).

### Income before Taxes
- The company's income after all expenses have been deducted (SGA, R&D, depreciation, amortization, interest, gain/loss on sale of assets, etc.) but before taxes have been deducted.
- Buffett uses this to calculate his ROI, if he bought a business or a stake in a business.

### Net Earnings (Net Income)
- The company's income after expenses + taxes
- historical upward trend is desired (similar to Graham approach with earnings)
- Buffett uses the general earnings instead of EPS because through share repurchase programs, companies can increase EPS without increasing earnings. This can be misleading.
- Ratio of net earnings to gross profit of a company with a durable advantage should be larger than its competitors.
- Rule of thumb - if net earnings / gross profit is > 20%, the company has a durable competitive advantage. If it's less than 10%, it's in a competitive industry. If it's 10-20%, it's gray area.
- Be careful in the banking and financial sector - a huge ratio can indicate a company is taking on too much risk.

### EPS (Earnings Per Share)
- EPS is the company's net earnings divided by the number of outstanding shares.
- Similar to earnings stability, we also seek EPS stability (and growth) over time. Instability in EPS indicates that the business is not tuned to the supply and demand of the market. (it's under-resourced when there's demand and over-resourced when there's no demand)