In [1]:
import requests
from bs4 import BeautifulSoup

### Parsing Yahoo quote summaries with BeautifulSoup

In [2]:
url = 'https://finance.yahoo.com/quote/AAPL?p=AAPL'

### Creating Chrome Use-Agent headers
First, we let's change the headers so it seems we are a User-Agent
looking at the web page through Chrome

- Go to http://www.whoishostingthis.com/tools/user-agent/ to see your own user agent

- read this post: https://stackoverflow.com/questions/27652543/how-to-use-python-requests-to-fake-a-browser-visit


In [3]:
# install fake-useragent https://pypi.python.org/pypi/fake-useragent
from fake_useragent import UserAgent
ua = UserAgent()

In [4]:
ua.chrome

'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36'

In [5]:
header = {'User-Agent':str(ua.chrome)}
header

{'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'}

In [6]:
# get the webpage using the fake-useragent header above
htmlContent = requests.get(url, headers=header)
htmlContent.text[:1000]

'<!DOCTYPE html><html id="atomic" class="NoJs chrome desktop" lang="en-US"><head prefix="og: http://ogp.me/ns#"><script>window.performance && window.performance.mark && window.performance.mark(\'PageStart\');</script><meta charset="utf-8"/><title>AAPL : Summary for Apple Inc. - Yahoo Finance</title><meta name="keywords" content="AAPL, Apple Inc., AAPL stock chart, Apple Inc. stock chart, stock chart, stocks, quotes, finance"/><meta http-equiv="x-dns-prefetch-control" content="on"/><meta property="twitter:dnt" content="on"/><meta property="twitter:site" content="@YahooFinance"/><meta property="fb:app_id" content="90376669494"/><meta name="theme-color" content="#400090"/><meta name="viewport" content="width=device-width, initial-scale=1"/><meta name="description" lang="en-US" content="View the basic AAPL stock chart on Yahoo Finance. Change the date range, chart type and compare Apple Inc. against other companies."/><link rel="dns-prefetch" href="//l.yimg.com"/><link rel="dns-prefetch" h

In [7]:
# create a BeautifulSoup object that parses the html
soup = BeautifulSoup(htmlContent.text, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html class="NoJs chrome desktop" id="atomic" lang="en-US">
 <head prefix="og: http://ogp.me/ns#">
  <script>
   window.performance && window.performance.mark && window.performance.mark('PageStart');
  </script>
  <meta charset="utf-8"/>
  <title>
   AAPL : Summary for Apple Inc. - Yahoo Finance
  </title>
  <meta content="AAPL, Apple Inc., AAPL stock chart, Apple Inc. stock chart, stock chart, stocks, quotes, finance" name="keywords"/>
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <meta content="on" property="twitter:dnt"/>
  <meta content="@YahooFinance" property="twitter:site"/>
  <meta content="90376669494" property="fb:app_id"/>
  <meta content="#400090" name="theme-color"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="View the basic AAPL stock chart on Yahoo Finance. Change the date range, chart type and compare Apple Inc. against other companies." lang="en-US" name="description"/>
  <link href="//l.yimg.c

In [8]:
# this div contains two tables for the quote summary statistics
qs = soup.find_all('div', {'data-test':'quote-summary-stats'})[0]
print(qs.prettify())

<div class="Bxz(bb) D(ib) Va(t) Mih(250px)--lgv2 W(100%) Mt(-6px) W(49%)--lgv2 Mend(30px)--lgv2" data-reactid="34" data-test="quote-summary-stats" id="quote-summary">
 <div class="D(ib) W(1/2) Bxz(bb) Pend(12px) Va(t) ie-7_D(i)" data-reactid="35" data-test="left-summary-table">
  <table class="W(100%)" data-reactid="36">
   <tbody data-reactid="37">
    <tr class="Bxz(bb) Bdbw(1px) Bdbs(s) Bdc($c-fuji-grey-c) H(36px) " data-reactid="38">
     <td class="C(black) W(51%)" data-reactid="39">
      <span data-reactid="40">
       Previous Close
      </span>
     </td>
     <td class="Ta(end) Fw(b) Lh(14px)" data-reactid="41" data-test="PREV_CLOSE-value">
      <span class="Trsdu(0.3s) " data-reactid="42">
       <!-- react-text: 43 -->
       161.50
       <!-- /react-text -->
      </span>
     </td>
    </tr>
    <tr class="Bxz(bb) Bdbw(1px) Bdbs(s) Bdc($c-fuji-grey-c) H(36px) " data-reactid="44">
     <td class="C(black) W(51%)" data-reactid="45">
      <span data-reactid="46">
       

In [9]:

# and these are the two tables
tables = qs.find_all('table')

print(tables[0].prettify())

<table class="W(100%)" data-reactid="36">
 <tbody data-reactid="37">
  <tr class="Bxz(bb) Bdbw(1px) Bdbs(s) Bdc($c-fuji-grey-c) H(36px) " data-reactid="38">
   <td class="C(black) W(51%)" data-reactid="39">
    <span data-reactid="40">
     Previous Close
    </span>
   </td>
   <td class="Ta(end) Fw(b) Lh(14px)" data-reactid="41" data-test="PREV_CLOSE-value">
    <span class="Trsdu(0.3s) " data-reactid="42">
     <!-- react-text: 43 -->
     161.50
     <!-- /react-text -->
    </span>
   </td>
  </tr>
  <tr class="Bxz(bb) Bdbw(1px) Bdbs(s) Bdc($c-fuji-grey-c) H(36px) " data-reactid="44">
   <td class="C(black) W(51%)" data-reactid="45">
    <span data-reactid="46">
     Open
    </span>
   </td>
   <td class="Ta(end) Fw(b) Lh(14px)" data-reactid="47" data-test="OPEN-value">
    <span class="Trsdu(0.3s) " data-reactid="48">
     <!-- react-text: 49 -->
     162.61
     <!-- /react-text -->
    </span>
   </td>
  </tr>
  <tr class="Bxz(bb) Bdbw(1px) Bdbs(s) Bdc($c-fuji-grey-c) H(36px) 

In [10]:
[x.get_text() for x in tables]

["Previous Close161.50Open162.61Bid160.85 x 100Ask160.86 x 1000Day's Range158.77 - 163.9652 Week Range104.08 - 164.94Volume70,963,106Avg. Volume26,685,771",
 'Market Cap830.88BBeta1.43PE Ratio (TTM)18.26EPS (TTM)8.81Earnings DateOct 23, 2017 - Oct 27, 2017Dividend & Yield2.52 (1.56%)Ex-Dividend Date2017-08-101y Target Est171.95']

In [11]:
# let's parse the tables
out = []
for table in tables:
    tout = []
    for row in table.find_all('tr'):
        rout = []
        for column in row.find_all('td'):
            rout.append(column.get_text())
        print(rout)
    print('')

['Previous Close', '161.50']
['Open', '162.61']
['Bid', '160.85 x 100']
['Ask', '160.86 x 1000']
["Day's Range", '158.77 - 163.96']
['52 Week Range', '104.08 - 164.94']
['Volume', '70,963,106']
['Avg. Volume', '26,685,771']

['Market Cap', '830.88B']
['Beta', '1.43']
['PE Ratio (TTM)', '18.26']
['EPS (TTM)', '8.81']
['Earnings Date', 'Oct 23, 2017 - Oct 27, 2017']
['Dividend & Yield', '2.52 (1.56%)']
['Ex-Dividend Date', '2017-08-10']
['1y Target Est', '171.95']

