# Test Notebook for Web Scraping

---

## Book Examples

### Exact Example

In [2]:
from bs4 import BeautifulSoup
from urllib.request import urlopen

In [3]:
# 1. Open the URL
html_ex = urlopen('http://www.pythonscraping.com/pages/page1.html')

In [4]:
# 2. Read the HTML with bs
bs = BeautifulSoup(html_ex.read(), 'html.parser')

In [5]:
# 3. Retrieve the first header tag
print(bs.h1)

<h1>An Interesting Title</h1>


---

### Another Book Example

In [6]:
html = urlopen('http://www.pythonscraping.com/pages/page3.html')

bs = BeautifulSoup(html, 'html.parser')

for child in bs.find('table', {'id':'giftList'}).children:
    print(child)



<tr><th>
Item Title
</th><th>
Description
</th><th>
Cost
</th><th>
Image
</th></tr>


<tr class="gift" id="gift1"><td>
Vegetable Basket
</td><td>
This vegetable basket is the perfect gift for your health conscious (or overweight) friends!
<span class="excitingNote">Now with super-colorful bell peppers!</span>
</td><td>
$15.00
</td><td>
<img src="../img/gifts/img1.jpg"/>
</td></tr>


<tr class="gift" id="gift2"><td>
Russian Nesting Dolls
</td><td>
Hand-painted by trained monkeys, these exquisite dolls are priceless! And by "priceless," we mean "extremely expensive"! <span class="excitingNote">8 entire dolls per set! Octuple the presents!</span>
</td><td>
$10,000.52
</td><td>
<img src="../img/gifts/img2.jpg"/>
</td></tr>


<tr class="gift" id="gift3"><td>
Fish Painting
</td><td>
If something seems fishy about this painting, it's because it's a fish! <span class="excitingNote">Also hand-painted by trained monkeys!</span>
</td><td>
$10,005.00
</td><td>
<img src="../img/gifts/img3.jpg"/>


---

### Modified Example

In [7]:
from urllib.request import Request, urlopen

In [8]:
# Set a header variable
hdr = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}

In [9]:
# 0. Use Request to apply headers to request
req = Request('http://www.pythonscraping.com/pages/page1.html', headers=hdr)

In [10]:
# 1. Open the request (URL and headers)
html = urlopen(req)

In [11]:
# 2. Read the HTML with bs
bs = BeautifulSoup(html.read(), 'html.parser')

In [12]:
# 3. Retrieve the first header tag
print(bs.h1)

<h1>An Interesting Title</h1>


---

## Applied Examples

In [13]:
# Set variables for URL and header

# Seinfeld Quotes Page
url_quotes_page = "https://www.seinfeldscripts.com/seinfeld-quotes.html"

# Header
hdr = {'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'}

In [14]:
# 0. Use Request to add url and headers to request
req = Request(url_quotes_page, headers=hdr)

In [15]:
# 1. Get page HTML with urlopen
html = urlopen(req)

In [16]:
# 2. Read HTML with bs 
bs = BeautifulSoup(html.read(), 'html.parser')

In [17]:
# 3. Get page HTML 
# bs

---

## Sorting Quotes

In [18]:
quotes = []