# S&P500 Historial Components & Changes

Let's use web scraping to grab the S&P 500 current and historical components. For those of you that just want the data, see the below download links.

In [1]:
import datetime as dt
import pandas as pd

## 1. Web Scrape Using Pandas

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
data = pd.read_html(url)
print(f"We have a {type(data)} with length {len(data)}.")

We have a <class 'list'> with length 2.


In [3]:
data[0].head()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,ACN,Accenture,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [4]:
data[1].head()

Unnamed: 0_level_0,Date,Added,Added,Removed,Removed,Reason
Unnamed: 0_level_1,Date,Ticker,Security,Ticker,Security,Reason
0,"September 20, 2021",MTCH,Match Group,PRGO,Perrigo,Market capitalization change.[6]
1,"September 20, 2021",CDAY,Ceridian,UNM,Unum,Market capitalization change.[6]
2,"September 20, 2021",BRO,Brown & Brown,NOV,Nov,Market capitalization change.[6]
3,"August 30, 2021",TECH,Bio-Techne,MXIM,Maxim Integrated,S&P 500 constituent Analog Devices acquired Ma...
4,"July 21, 2021",MRNA,Moderna,ALXN,Alexion Pharmaceuticals,AstraZeneca Plc acquired Alexion Pharmaceutica...


## 2. Get Current S&P 500 Members

In [5]:
sp500 = data[0].iloc[:, [0,1,6,7]]
sp500.columns = ['ticker', 'name', 'date' , 'cik']
sp500.head()

Unnamed: 0,ticker,name,date,cik
0,MMM,3M,1976-08-09,66740
1,ABT,Abbott Laboratories,1964-03-31,1800
2,ABBV,AbbVie,2012-12-31,1551152
3,ABMD,Abiomed,2018-05-31,815094
4,ACN,Accenture,2011-07-06,1467373


In [6]:
sp500[sp500['date'].isnull()]

Unnamed: 0,ticker,name,date,cik
7,AMD,Advanced Micro Devices,,2488
126,ED,Consolidated Edison,,1047862
130,GLW,Corning,,24741
138,DHR,Danaher Corporation,,313616
139,DRI,Darden Restaurants,,940944
154,D,Dominion Energy,,715957
164,ETN,Eaton Corporation,,1551182
181,ES,Eversource Energy,,72741
194,FITB,Fifth Third Bancorp,,35527
195,FE,FirstEnergy,,1031296


In [7]:
mask = sp500['date'].str.strip().str.fullmatch('\d{4}-\d{2}-\d{2}')
mask.loc[mask.isnull()] = False
mask = mask == False
sp500[mask]

Unnamed: 0,ticker,name,date,cik
7,AMD,Advanced Micro Devices,,2488
51,T,AT&T,1983-11-30 (1957-03-04),732717
126,ED,Consolidated Edison,,1047862
130,GLW,Corning,,24741
138,DHR,Danaher Corporation,,313616
139,DRI,Darden Restaurants,,940944
154,D,Dominion Energy,,715957
164,ETN,Eaton Corporation,,1551182
181,ES,Eversource Energy,,72741
194,FITB,Fifth Third Bancorp,,35527


In [24]:
current = sp500.copy()
current.loc[mask, 'date'] = '1900-01-01'
current.loc[:, 'date'] = pd.to_datetime(current['date'])
current.loc[:, 'cik'] = current['cik'].apply(str).str.zfill(10)
current.head(10)

Unnamed: 0,ticker,name,date,cik
0,MMM,3M,1976-08-09,66740
1,ABT,Abbott Laboratories,1964-03-31,1800
2,ABBV,AbbVie,2012-12-31,1551152
3,ABMD,Abiomed,2018-05-31,815094
4,ACN,Accenture,2011-07-06,1467373
5,ATVI,Activision Blizzard,2015-08-31,718877
6,ADBE,Adobe,1997-05-05,796343
7,AMD,Advanced Micro Devices,1900-01-01,2488
8,AAP,Advance Auto Parts,2015-07-09,1158449
9,AES,AES Corp,1998-10-02,874761


## 3. Manage Adjustments

In [9]:
adjustments = data[1]
adjustments

Unnamed: 0_level_0,Date,Added,Added,Removed,Removed,Reason
Unnamed: 0_level_1,Date,Ticker,Security,Ticker,Security,Reason
0,"September 20, 2021",MTCH,Match Group,PRGO,Perrigo,Market capitalization change.[6]
1,"September 20, 2021",CDAY,Ceridian,UNM,Unum,Market capitalization change.[6]
2,"September 20, 2021",BRO,Brown & Brown,NOV,Nov,Market capitalization change.[6]
3,"August 30, 2021",TECH,Bio-Techne,MXIM,Maxim Integrated,S&P 500 constituent Analog Devices acquired Ma...
4,"July 21, 2021",MRNA,Moderna,ALXN,Alexion Pharmaceuticals,AstraZeneca Plc acquired Alexion Pharmaceutica...
...,...,...,...,...,...,...
276,"December 5, 2000",SBL,Symbol Technologies,OI,Owens-Illinois,Market Cap changes.
277,"December 5, 2000",AYE,Allegheny Energy,GRA,WR Grace,Market Cap changes.
278,"December 5, 2000",ABK,Ambac Financial,CCK,Crown Holdings,Market Cap changes.
279,"July 27, 2000",JDSU,JDS Uniphase,RAD,RiteAid,Market Cap change.[219]


In [10]:
columns = ['date', 'ticker_added', 'name_added', 'ticker_removed', 'name_removed', 'reason']
adjustments.columns = columns
adjustments

Unnamed: 0,date,ticker_added,name_added,ticker_removed,name_removed,reason
0,"September 20, 2021",MTCH,Match Group,PRGO,Perrigo,Market capitalization change.[6]
1,"September 20, 2021",CDAY,Ceridian,UNM,Unum,Market capitalization change.[6]
2,"September 20, 2021",BRO,Brown & Brown,NOV,Nov,Market capitalization change.[6]
3,"August 30, 2021",TECH,Bio-Techne,MXIM,Maxim Integrated,S&P 500 constituent Analog Devices acquired Ma...
4,"July 21, 2021",MRNA,Moderna,ALXN,Alexion Pharmaceuticals,AstraZeneca Plc acquired Alexion Pharmaceutica...
...,...,...,...,...,...,...
276,"December 5, 2000",SBL,Symbol Technologies,OI,Owens-Illinois,Market Cap changes.
277,"December 5, 2000",AYE,Allegheny Energy,GRA,WR Grace,Market Cap changes.
278,"December 5, 2000",ABK,Ambac Financial,CCK,Crown Holdings,Market Cap changes.
279,"July 27, 2000",JDSU,JDS Uniphase,RAD,RiteAid,Market Cap change.[219]


In [11]:
adjustments.isnull().sum()

date               0
ticker_added       5
name_added         5
ticker_removed    10
name_removed      10
reason             0
dtype: int64

In [12]:
adjustments[adjustments['ticker_removed'].isnull()]

Unnamed: 0,date,ticker_added,name_added,ticker_removed,name_removed,reason
6,"June 3, 2021",OGN,Organon & Co.,,,S&P 500/100 constituent Merck & Co. spun off O...
18,"October 9, 2020",VNT,Vontier,,,S&P 500 constituent Fortive spun off Vontier.[17]
31,"April 3, 2020",OTIS,Otis Worldwide,,,United Technologies spun off Otis and Carrier ...
32,"April 3, 2020",CARR,Carrier Global,,,United Technologies spun off Otis and Carrier ...
126,"April 8, 2016",UA,Under Armour (Class C),,,Under Armour distribution of second class of s...
144,"September 18, 2015",CMCSK,Comcast Class K Special,,,Share class methodology change[112]
145,"September 18, 2015",FOX,Twenty-First Century Fox Class B,,,Share class methodology change
146,"September 18, 2015",NWS,News Corporation Class B,,,Share class methodology change
171,"August 6, 2014",DISCK,Discovery Communications,,,Class C share distribution[133]
178,"April 3, 2014",GOOGL,Google,,,Google Class C share distribution[139]


In [13]:
additions = adjustments[~adjustments['ticker_added'].isnull()][['date','ticker_added', 'name_added']]
additions.columns = ['date','ticker','name']
additions['action'] = 'added'
additions

Unnamed: 0,date,ticker,name,action
0,"September 20, 2021",MTCH,Match Group,added
1,"September 20, 2021",CDAY,Ceridian,added
2,"September 20, 2021",BRO,Brown & Brown,added
3,"August 30, 2021",TECH,Bio-Techne,added
4,"July 21, 2021",MRNA,Moderna,added
...,...,...,...,...
276,"December 5, 2000",SBL,Symbol Technologies,added
277,"December 5, 2000",AYE,Allegheny Energy,added
278,"December 5, 2000",ABK,Ambac Financial,added
279,"July 27, 2000",JDSU,JDS Uniphase,added


In [14]:
removals = adjustments[~adjustments['ticker_removed'].isnull()][['date','ticker_removed','name_removed']]
removals.columns = ['date','ticker','name']
removals['action'] = 'removed'
removals

Unnamed: 0,date,ticker,name,action
0,"September 20, 2021",PRGO,Perrigo,removed
1,"September 20, 2021",UNM,Unum,removed
2,"September 20, 2021",NOV,Nov,removed
3,"August 30, 2021",MXIM,Maxim Integrated,removed
4,"July 21, 2021",ALXN,Alexion Pharmaceuticals,removed
...,...,...,...,...
276,"December 5, 2000",OI,Owens-Illinois,removed
277,"December 5, 2000",GRA,WR Grace,removed
278,"December 5, 2000",CCK,Crown Holdings,removed
279,"July 27, 2000",RAD,RiteAid,removed


In [15]:
historical = pd.concat([additions, removals])
historical

Unnamed: 0,date,ticker,name,action
0,"September 20, 2021",MTCH,Match Group,added
1,"September 20, 2021",CDAY,Ceridian,added
2,"September 20, 2021",BRO,Brown & Brown,added
3,"August 30, 2021",TECH,Bio-Techne,added
4,"July 21, 2021",MRNA,Moderna,added
...,...,...,...,...
276,"December 5, 2000",OI,Owens-Illinois,removed
277,"December 5, 2000",GRA,WR Grace,removed
278,"December 5, 2000",CCK,Crown Holdings,removed
279,"July 27, 2000",RAD,RiteAid,removed


## 4. Merge Missing Tickers

In [23]:
missing = current[~current['ticker'].isin(historical['ticker'])].copy()
missing

Unnamed: 0,ticker,name,date,cik
0,MMM,3M,1976-08-09,00066740
1,ABT,Abbott Laboratories,1964-03-31,00001800
6,ADBE,Adobe,1997-05-05,00796343
9,AES,AES Corp,1998-10-02,00874761
10,AFL,Aflac,1999-05-28,00004977
...,...,...,...,...
497,XEL,Xcel Energy,1957-03-04,00072903
498,XLNX,Xilinx,1999-11-08,00743988
500,YUM,Yum! Brands,1997-10-06,01041061
502,ZBH,Zimmer Biomet,2001-08-07,01136869


In [17]:
missing['action'] = 'added'
missing = missing[['date','ticker','name','action', 'cik']]
missing

Unnamed: 0,date,ticker,name,action,cik
0,1976-08-09,MMM,3M,added,66740
1,1964-03-31,ABT,Abbott Laboratories,added,1800
6,1997-05-05,ADBE,Adobe,added,796343
9,1998-10-02,AES,AES Corp,added,874761
10,1999-05-28,AFL,Aflac,added,4977
...,...,...,...,...,...
497,1957-03-04,XEL,Xcel Energy,added,72903
498,1999-11-08,XLNX,Xilinx,added,743988
500,1997-10-06,YUM,Yum! Brands,added,1041061
502,2001-08-07,ZBH,Zimmer Biomet,added,1136869


In [18]:
sp500_history = pd.concat([historical, missing])
sp500_history = sp500_history.sort_values(by=['date','ticker'], ascending=[False, True])
sp500_history

Unnamed: 0,date,ticker,name,action,cik
112,"September 8, 2016",CHTR,Charter Communications,added,
112,"September 8, 2016",EMC,EMC Corporation,removed,
113,"September 6, 2016",MTD,Mettler Toledo,added,
113,"September 6, 2016",TYC,Tyco International,removed,
208,"September 5, 2012",LYB,LyondellBasell,added,
...,...,...,...,...,...
484,,WAT,Waters Corporation,added,1000697.0
493,,WHR,Whirlpool Corporation,added,106640.0
483,,WM,Waste Management,added,823768.0
491,,WRK,WestRock,added,1732845.0


## 5. Export to CSV

In [19]:
today = dt.datetime.today().strftime('%Y-%m-%d')
today

'2021-09-24'

In [25]:
current.to_csv(f"{today}-sp500.csv")
sp500_history.to_csv(f"{today}-sp500_history.csv")
