In [1]:
import os 
import pandas as pd 
from urllib.request import urlopen
import json

In [2]:
## read in api key
with open('../.fmp_api.key') as f:
    api_key = f.read().strip()

In [3]:
## parameters
start_date = '2015-01-01' 
end_date = '2024-12-31'
sp500 = '^GSPC'

In [4]:
## function to get data from api.
def get_fmp_data(url):
    response = urlopen(url)
    data = response.read().decode("utf-8")
    return pd.DataFrame(json.loads(data))


In [5]:
## get S&P 500 prices history.
url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={sp500}&from={start_date}&to={end_date}&apikey={api_key}"
sppa = get_fmp_data(url) ## S&P 500 price action

## get S&P 500 constituents
url = f"https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={api_key}"
spc = get_fmp_data(url) ## current S&P 500 constituents

url = f"https://financialmodelingprep.com/api/v3/historical/sp500_constituent?apikey={api_key}"
sphc = get_fmp_data(url) ## historical additions and removals

In [6]:
## process S&P 500 data
sp500 = \
(
    sppa
    .loc[:, ['date', 'close']]
    .assign(date = lambda x: pd.to_datetime(x.date))
    .set_index('date')
    .sort_index(ascending=True)
    .assign(pctchg = lambda x: x.close.pct_change())
    .dropna()
    .resample('ME')
    .agg({'close': 'last',
          'pctchg': lambda x: (1 + x).prod() - 1})
    .reset_index()
    .assign(month = lambda df_: df_.date.dt.to_period('M'))
    .set_index('month')
)

In [None]:
## start with the current constituents and work backwards to build monthly snapshots of the S&P 500 constituents.
sp500_constituents = {sp500.index[-1]: set(spc.symbol)}

sp500_changes = \
(
    sphc
    .loc[:, ['date', 'symbol', 'removedTicker']]
    .assign(month = lambda df_: pd.to_datetime(df_.date).dt.to_period('M'),
            symbol = lambda df_: df_.symbol.mask(df_.symbol.eq(df_.removedTicker), None))
    .groupby('month')
    .agg({'symbol': set, 'removedTicker': set})
    .sort_index(ascending=False)
    .rename(columns={'symbol': 'added', 'removedTicker': 'removed'})
)

month_i = sp500.index[-1]
symbolsToRemove = sp500_changes.loc[month_i, 'added']
symbolsToAdd = sp500_changes.loc[month_i, 'removed']
constituents_i = sp500_constituents[month_i].copy()
constituents_i.difference_update(symbolsToRemove)
constituents_i.update(symbolsToAdd)

for month_i in sp500.index[-1:0:-1]:
    if month_i in sp500_changes.index:
        symbolsToRemove = sp500_changes.loc[month_i, 'added']
        if symbolsToRemove:
            constituents_i.difference_update(symbolsToRemove.copy())
        symbolsToAdd = sp500_changes.loc[month_i, 'removed']
        if symbolsToAdd:
            constituents_i.update(symbolsToAdd.copy())
        constituents_i.discard('')

    sp500_constituents[month_i] = constituents_i.copy()

In [94]:
def get_market_cap(ticker, start_date, end_date):
    url = f"https://financialmodelingprep.com/api/v3/historical-market-capitalization/{ticker}?limit=1500&from={start_date}&to={end_date}&apikey={api_key}"
    try:
        return (get_fmp_data(url)
            .loc[:, ['date', 'marketCap']]
            .assign(date = lambda x: pd.to_datetime(x.date))
            .set_index('date')
            .sort_index(ascending=True)
            .assign(marketCap = lambda x: x.marketCap.div(1e9))
            .resample('ME')
            .agg({'marketCap': 'first'})
            .reset_index()
            .assign(month = lambda df_: df_.date.dt.to_period('M'))
            .set_index('month')
            .loc[:, ['marketCap']]
        )
    except:
        return None

## get market cap data for each constituent
all_symbols = set(sp500_constituents[sp500.index[-1]])
for month, symbols in sp500_constituents.items():
    all_symbols.update(symbols)

In [95]:
market_caps = dict(zip(sp500.index, [dict() for _ in range(len(sp500))]))
for symbol in sorted(list(all_symbols)):
    for start_date, end_date in [('2015-01-01', '2019-12-31'), ('2020-01-01', '2024-12-31')]:
        market_cap = get_market_cap(symbol, start_date, end_date)
        print (symbol, market_cap.size if market_cap is not None else 0)
        if market_cap is not None:
            for month, cap in market_cap.iterrows():
                market_caps[month][symbol] = cap.marketCap

A 60
A 60
AA 60
AA 60
AAL 60
AAL 60
AAP 60
AAP 60
AAPL 60
AAPL 60
ABBV 60
ABBV 60
ABMD 0
ABMD 0
ABNB 0
ABNB 49
ABT 60
ABT 60
ACE 0
ACE 0
ACGL 60
ACGL 60
ACN 60
ACN 60
ADBE 60
ADBE 60
ADI 60
ADI 60
ADM 60
ADM 60
ADP 60
ADP 60
ADS 60
ADS 60
ADSK 60
ADSK 60
ADT 24
ADT 60
AEE 60
AEE 60
AEP 60
AEP 60
AES 60
AES 60
AET 0
AET 0
AFL 60
AFL 60
AGN 0
AGN 0
AIG 60
AIG 60
AIV 60
AIV 60
AIZ 60
AIZ 60
AJG 60
AJG 60
AKAM 60
AKAM 60
ALB 60
ALB 60
ALGN 60
ALGN 60
ALK 60
ALK 60
ALL 60
ALL 60
ALLE 60
ALLE 60
ALTR 26
ALTR 60
ALXN 0
ALXN 0
AMAT 60
AMAT 60
AMCR 7
AMCR 60
AMD 60
AMD 60
AME 60
AME 60
AMG 60
AMG 60
AMGN 60
AMGN 60
AMP 60
AMP 60
AMT 60
AMT 60
AMTM 0
AMTM 4
AMZN 60
AMZN 60
AN 60
AN 60
ANDV 0
ANDV 0
ANET 60
ANET 60
ANSS 60
ANSS 60
AON 60
AON 60
AOS 60
AOS 60
APA 60
APA 60
APC 0
APC 0
APD 60
APD 60
APH 60
APH 60
APTV 60
APTV 60
ARE 60
ARE 60
ARG 0
ARG 0
ATI 60
ATI 60
ATO 60
ATO 60
ATVI 60
ATVI 46
AVB 60
AVB 60
AVGO 60
AVGO 60
AVP 0
AVP 0
AVY 60
AVY 60
AWK 60
AWK 60
AXON 60
AXON 60
AXP 60
AXP 60
AY

In [98]:
pd.DataFrame(market_caps).to_json('sp500_market_caps.json', orient='index')