# WEB SCRAPING FOR S&P500 COMPANIES

### Aim :

To Scrap the S&P 500 Table from the Wikipedia for the Live Stock Market Forecasting

### Methods:

* Importing the Libraries (Requests, BeautifulSoup, Pickle)
* Using Request Lib to Connect to the URl
* Using the BeautifulSoup to Scrap the table from Wikipedia
* Getting the Symbols and Company Name
* Convert the List into an Dictionary
* Pack the Dictionary for Future Use by using Pickle Module

In [1]:
### Importing the Libraries 
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import pickle
from urllib.request import urlopen

### Connecting to the Server
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = urlopen(url) 
soup = BeautifulSoup(html, 'html.parser')

In [2]:
### Beautiful Soup to Find the Table 
tables=soup.find('table',{'class':'wikitable sortable'}) ###Here Class is to find the Speicifc Table

In [3]:
### Scraping the Data 
tickers = []
companies = []

for table in tables.findAll('tr')[1:]: ### Here tr Means Table rows and ('tr')[1;] means Skipping the first rows since its an Header
    ticker=table.findAll('td')[0].text
    company=table.findAll('td')[1].text.strip()
    tickers.append(ticker)
    companies.append(company)
    
print(tickers)
print(companies)

['MMM\n', 'ABT\n', 'ABBV\n', 'ABMD\n', 'ACN\n', 'ATVI\n', 'ADBE\n', 'AMD\n', 'AAP\n', 'AES\n', 'AFL\n', 'A\n', 'APD\n', 'AKAM\n', 'ALK\n', 'ALB\n', 'ARE\n', 'ALXN\n', 'ALGN\n', 'ALLE\n', 'LNT\n', 'ALL\n', 'GOOGL\n', 'GOOG\n', 'MO\n', 'AMZN\n', 'AMCR\n', 'AEE\n', 'AAL\n', 'AEP\n', 'AXP\n', 'AIG\n', 'AMT\n', 'AWK\n', 'AMP\n', 'ABC\n', 'AME\n', 'AMGN\n', 'APH\n', 'ADI\n', 'ANSS\n', 'ANTM\n', 'AON\n', 'AOS\n', 'APA\n', 'AIV\n', 'AAPL\n', 'AMAT\n', 'APTV\n', 'ADM\n', 'ANET\n', 'AJG\n', 'AIZ\n', 'T\n', 'ATO\n', 'ADSK\n', 'ADP\n', 'AZO\n', 'AVB\n', 'AVY\n', 'BKR\n', 'BLL\n', 'BAC\n', 'BK\n', 'BAX\n', 'BDX\n', 'BRK.B\n', 'BBY\n', 'BIO\n', 'BIIB\n', 'BLK\n', 'BA\n', 'BKNG\n', 'BWA\n', 'BXP\n', 'BSX\n', 'BMY\n', 'AVGO\n', 'BR\n', 'BF.B\n', 'CHRW\n', 'COG\n', 'CDNS\n', 'CPB\n', 'COF\n', 'CAH\n', 'KMX\n', 'CCL\n', 'CARR\n', 'CAT\n', 'CBOE\n', 'CBRE\n', 'CDW\n', 'CE\n', 'CNC\n', 'CNP\n', 'CTL\n', 'CERN\n', 'CF\n', 'SCHW\n', 'CHTR\n', 'CVX\n', 'CMG\n', 'CB\n', 'CHD\n', 'CI\n', 'CINF\n', 'CTAS\n', 'C

In [4]:
### Data Cleaning
symbols=[]
for i in tickers:
    symbols.append(i[:-1])
    
print(symbols)

['MMM', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADBE', 'AMD', 'AAP', 'AES', 'AFL', 'A', 'APD', 'AKAM', 'ALK', 'ALB', 'ARE', 'ALXN', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'ANTM', 'AON', 'AOS', 'APA', 'AIV', 'AAPL', 'AMAT', 'APTV', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'BKR', 'BLL', 'BAC', 'BK', 'BAX', 'BDX', 'BRK.B', 'BBY', 'BIO', 'BIIB', 'BLK', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BF.B', 'CHRW', 'COG', 'CDNS', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'CNC', 'CNP', 'CTL', 'CERN', 'CF', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CTXS', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'CXO', 'COP', 'ED', 'STZ', 'COO', 'CPRT', 'GLW', 'CTVA', 'COST', 'COTY', 'CCI', 'CSX', 'CMI', 'CVS', 'DHI', 'DHR', 'DRI', 'DVA

In [5]:
### DataFrame
import pandas as pd 
data=pd.DataFrame({"Symbols":symbols,
                   "Company Name":companies})
data.head()

Unnamed: 0,Symbols,Company Name
0,MMM,3M Company
1,ABT,Abbott Laboratories
2,ABBV,AbbVie Inc.
3,ABMD,ABIOMED Inc
4,ACN,Accenture plc


In [6]:
### Convert into a Dictionary
res = {} 
for key in symbols: 
    for value in companies: 
        res[key] = value 
        companies.remove(value) 
        break  

In [7]:
res

{'MMM': '3M Company',
 'ABT': 'Abbott Laboratories',
 'ABBV': 'AbbVie Inc.',
 'ABMD': 'ABIOMED Inc',
 'ACN': 'Accenture plc',
 'ATVI': 'Activision Blizzard',
 'ADBE': 'Adobe Inc.',
 'AMD': 'Advanced Micro Devices Inc',
 'AAP': 'Advance Auto Parts',
 'AES': 'AES Corp',
 'AFL': 'AFLAC Inc',
 'A': 'Agilent Technologies Inc',
 'APD': 'Air Products & Chemicals Inc',
 'AKAM': 'Akamai Technologies Inc',
 'ALK': 'Alaska Air Group Inc',
 'ALB': 'Albemarle Corp',
 'ARE': 'Alexandria Real Estate Equities',
 'ALXN': 'Alexion Pharmaceuticals',
 'ALGN': 'Align Technology',
 'ALLE': 'Allegion',
 'LNT': 'Alliant Energy Corp',
 'ALL': 'Allstate Corp',
 'GOOGL': 'Alphabet Inc. (Class A)',
 'GOOG': 'Alphabet Inc. (Class C)',
 'MO': 'Altria Group Inc',
 'AMZN': 'Amazon.com Inc.',
 'AMCR': 'Amcor plc',
 'AEE': 'Ameren Corp',
 'AAL': 'American Airlines Group',
 'AEP': 'American Electric Power',
 'AXP': 'American Express Co',
 'AIG': 'American International Group',
 'AMT': 'American Tower Corp.',
 'AWK': 'Am

In [8]:
new_res = dict([(value, key) for key, value in res.items()])

In [9]:
new_res

{'3M Company': 'MMM',
 'Abbott Laboratories': 'ABT',
 'AbbVie Inc.': 'ABBV',
 'ABIOMED Inc': 'ABMD',
 'Accenture plc': 'ACN',
 'Activision Blizzard': 'ATVI',
 'Adobe Inc.': 'ADBE',
 'Advanced Micro Devices Inc': 'AMD',
 'Advance Auto Parts': 'AAP',
 'AES Corp': 'AES',
 'AFLAC Inc': 'AFL',
 'Agilent Technologies Inc': 'A',
 'Air Products & Chemicals Inc': 'APD',
 'Akamai Technologies Inc': 'AKAM',
 'Alaska Air Group Inc': 'ALK',
 'Albemarle Corp': 'ALB',
 'Alexandria Real Estate Equities': 'ARE',
 'Alexion Pharmaceuticals': 'ALXN',
 'Align Technology': 'ALGN',
 'Allegion': 'ALLE',
 'Alliant Energy Corp': 'LNT',
 'Allstate Corp': 'ALL',
 'Alphabet Inc. (Class A)': 'GOOGL',
 'Alphabet Inc. (Class C)': 'GOOG',
 'Altria Group Inc': 'MO',
 'Amazon.com Inc.': 'AMZN',
 'Amcor plc': 'AMCR',
 'Ameren Corp': 'AEE',
 'American Airlines Group': 'AAL',
 'American Electric Power': 'AEP',
 'American Express Co': 'AXP',
 'American International Group': 'AIG',
 'American Tower Corp.': 'AMT',
 'American 

In [10]:
### Packing the Model
with open("S&P500tickers.pickle","wb") as f:
    pickle.dump(new_res,f)