# Web Scraping for S&P 500 Data

In [1]:
import bs4 as bs
import pickle
import requests

### Get S&P500 Data using requests and BeautifulSoup

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
resp = requests.get(url) # Returns a requests object
resp

<Response [200]>

In [3]:
# Parse the request using BeautifulSoup (indicate parser as 'html5lib')
# Returns page source
soup = bs.BeautifulSoup(resp.text, 'html5lib')

In [4]:
# Find the table in the source
# Returns source of only the table
table = soup.find('table', {'class': 'wikitable sortable'})

In [5]:
# Loop through table to get the tickers
tickers = []

# Start at position 1 to not include the headers
for row in table.findAll('tr')[1:]:
    ticker = row.findAll('td')[0].text # Get the ticker from te first column as text
    tickers.append(ticker)

In [6]:
# Look at first few tickers
tickers[0:5]

['MMM', 'ABT', 'ABBV', 'ACN', 'ATVI']

### Pickle the tickers for later use:

In [7]:
with open("Data/sp500tickers.pickle", 'wb') as f:
    pickle.dump(tickers, f)

## Convert into a function

In [8]:
def get_tickers(url, filename):
    resp = requests.get(url) # Returns a requests object
    soup = bs.BeautifulSoup(resp.text, 'html5lib') #Parse the request using BeautifulSoup
    table = soup.find('table', {'class': 'wikitable sortable'}) #Find the table in the source
    
    #Loop through table to get the tickers
    tickers = []
    for row in table.findAll('tr')[1:]: #Start at position 1 to not include the headers
        ticker = row.findAll('td')[0].text # Get the ticker from te first column as text
        tickers.append(ticker)
        
    #Pickle the tickers for later use    
    with open(filename, 'wb') as f:
        pickle.dump(tickers, f)
    
    return tickers

In [9]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
name = "Data/sp500tickers.pickle"

data = get_tickers(url, name)

In [10]:
data[0:10]

['MMM', 'ABT', 'ABBV', 'ACN', 'ATVI', 'AYI', 'ADBE', 'AMD', 'AAP', 'AES']