In [1]:
import numpy as np
import pandas as pd
import yfinance as yf

# GET COMPONENTS

In [2]:
dow_url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
sp5_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

dow_components = pd.read_html(dow_url)[1]
sp5_components = pd.read_html(sp5_url)[0]

dow_components['Symbol'] = dow_components['Symbol'].str.replace('.', '-')
sp5_components['Symbol'] = sp5_components['Symbol'].str.replace('.', '-')

In [3]:
display(dow_components)
display(sp5_components)

Unnamed: 0,Company,Exchange,Symbol,Industry,Date added,Notes,Index weighting
0,3M,NYSE,MMM,Conglomerate,1976-08-09,As Minnesota Mining and Manufacturing,2.88%
1,American Express,NYSE,AXP,Financial services,1982-08-30,,3.56%
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,,4.88%
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,,3.15%
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,,3.40%
5,Caterpillar,NYSE,CAT,Construction and Mining,1991-05-06,,4.19%
6,Chevron,NYSE,CVX,Petroleum industry,2008-02-19,Also 1930-07-18 to 1999-11-01,3.05%
7,Cisco,NASDAQ,CSCO,Information technology,2009-06-08,,1.00%
8,Coca-Cola,NYSE,KO,Drink industry,1987-03-12,Also 1932-05-26 to 1935-11-20,1.28%
9,Disney,NYSE,DIS,Broadcasting and entertainment,1991-05-06,,2.32%


Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902
1,AOS,A. O. Smith,reports,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
3,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
...,...,...,...,...,...,...,...,...,...
499,YUM,Yum! Brands,reports,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,1041061,1997
500,ZBRA,Zebra,reports,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,877212,1969
501,ZBH,Zimmer Biomet,reports,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
502,ZION,Zions Bancorp,reports,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,109380,1873


In [4]:
dow_components.to_csv('dow_components.csv', index=False)
sp5_components.to_csv('sp5_components.csv', index=False)

# GET VALUES

In [5]:
dow_components = pd.read_csv('dow_components.csv')
sp5_components = pd.read_csv('sp5_components.csv')

idx_symbols = ['^DJI', '^GSPC']
etf_symbols = ['VOO', 'VTSAX']
dow_symbols = dow_components['Symbol'].tolist()
sp5_symbols = sp5_components['Symbol'].tolist()

In [6]:
idx_wide = yf.download(idx_symbols)
etf_wide = yf.download(etf_symbols)
dow_wide = yf.download(dow_symbols)
sp5_wide = yf.download(sp5_symbols)

[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  30 of 30 completed
[*********************100%***********************]  504 of 504 completed


In [7]:
def get_values(df):
    df.columns.names = ['Value', 'Symbol']
    df = df.stack().reset_index()
    df.columns.name = None
    df['Date'] = df['Date'].astype('datetime64')
    df['Volume'] = df['Volume'].astype('int64')
    return df


idx_values = get_values(idx_wide)
etf_values = get_values(etf_wide)
dow_values = get_values(dow_wide)
sp5_values = get_values(sp5_wide)

# display(idx_values.info())
# display(etf_values.info())
# display(dow_values.info())
# display(sp5_values.info())

# display(idx_values)
# display(etf_values)
# display(dow_values)
display(sp5_values)

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,1962-01-02,AEP,1.035685,34.312500,35.125000,34.312500,0.000000,5800
1,1962-01-02,BA,0.190931,0.823045,0.837449,0.823045,0.837449,352350
2,1962-01-02,CAT,0.130512,1.604167,1.619792,1.588542,1.604167,163200
3,1962-01-02,CNP,0.349574,10.783375,10.865333,10.783375,0.000000,13879
4,1962-01-02,CVX,0.046809,3.296131,3.296131,3.244048,0.000000,105840
...,...,...,...,...,...,...,...,...
4063303,2022-04-28,YUM,119.519997,119.519997,120.750000,118.169998,118.169998,1012636
4063304,2022-04-28,ZBH,124.610001,124.610001,124.904999,122.730003,124.150002,1058482
4063305,2022-04-28,ZBRA,382.980011,382.980011,384.940002,370.630005,375.589996,356606
4063306,2022-04-28,ZION,57.990002,57.990002,58.320000,55.919998,56.660000,1897811


In [8]:
idx_values.to_csv('idx_values.csv', index=False)
etf_values.to_csv('etf_values.csv', index=False)
dow_values.to_csv('dow_values.csv', index=False)
sp5_values.to_csv('sp5_values.csv', index=False)