In [None]:
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents
import yfinance as yf
import os, contextlib

In [None]:
period = '5y'

In [None]:
# get list of 500 companies from wikipedia
wikiurl="https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)

In [None]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
stocks = soup.find('table',{'class':"wikitable"})

In [None]:
df = pd.read_html(str(stocks))

# convert list to dataframe
df = pd.DataFrame(df[0])

# cols to keep
cols = ['Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry', 'Date first added']

sp500 = df.loc[:,cols].sort_values(by = 'Symbol')

sp500.to_csv('sp500_metadata.csv', index = False)

In [None]:
# saving list from the S&P metadata
symbols = list(sp500['Symbol'])

date_range = period
    
stock_list = []

for ticker in symbols:
    df = pd.DataFrame(yf.download(ticker, period = period,auto_adjust = True))
    df['Ticker'] = '{}'.format(ticker)
    stock_list.append(df)

dataframe = pd.concat(stock_list, axis=0).reset_index()

In [None]:
# formatting the order of dataframe
order = ['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']

dataframe = dataframe.loc[:,order]

In [None]:
dataframe.to_csv('sp500_prices_{}.csv'.format(period), index = False)