# Let's livecode!

First, let's import the libraries we'll need

In [4]:
import requests
from bs4 import BeautifulSoup

Now let's set the URL for the page we're scraping, make a request to get the HTML and parse it. 

In [5]:
url = "http://books.toscrape.com/index.html"
response = requests.get(url)
html = response.content
scraped = BeautifulSoup(html, 'html.parser')

In [6]:
scraped

<!DOCTYPE html>

<!--[if lt IE 7]>      <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html lang="en-us" class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-us"> <!--<![endif]-->
<head>
<title>
    All products | Books to Scrape - Sandbox
</title>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<meta content="24th Jun 2016 09:29" name="created"/>
<meta content="" name="description"/>
<meta content="width=device-width" name="viewport"/>
<meta content="NOARCHIVE,NOCACHE" name="robots"/>
<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
<!--[if lt IE 9]>
        <script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>
        <![endif]-->
<link href="static/oscar/favicon.ico" rel="shortcut icon"/>
<link href="static/oscar/css/styles.css" rel="stylesheet" type="text/css"/>
<link href="s

Finding single elements

In [7]:
scraped.find("title")

<title>
    All products | Books to Scrape - Sandbox
</title>

In [8]:
scraped.title

<title>
    All products | Books to Scrape - Sandbox
</title>

In [9]:
scraped.title.text

'\n    All products | Books to Scrape - Sandbox\n'

In [10]:
scraped.title.text.strip()

'All products | Books to Scrape - Sandbox'

In [11]:
title = scraped.title.text.strip()

In [13]:
print(title)

All products | Books to Scrape - Sandbox


In [16]:
first_link_text = scraped.article.h3.a.text.strip()

In [17]:
print(first_link_text)

A Light in the ...


In [18]:
first_link_text = scraped.article.h3.a["title"]

In [20]:
print(first_link_text)

A Light in the Attic


In [21]:
items = scraped.find_all("article", class_="product_pod")

for item in items:
    title = item.h3.a["title"]
    print(title)

A Light in the Attic
Tipping the Velvet
Soumission
Sharp Objects
Sapiens: A Brief History of Humankind
The Requiem Red
The Dirty Little Secrets of Getting Your Dream Job
The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull
The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
The Black Maria
Starving Hearts (Triangular Trade Trilogy, #1)
Shakespeare's Sonnets
Set Me Free
Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)
Rip it Up and Start Again
Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991
Olio
Mesaerion: The Best Science Fiction Stories 1800-1849
Libertarianism for Beginners
It's Only the Himalayas


In [23]:
title_links = scraped.find_all("a", title=True)

for link in title_links:
    title = link["title"]
    print(title)

A Light in the Attic
Tipping the Velvet
Soumission
Sharp Objects
Sapiens: A Brief History of Humankind
The Requiem Red
The Dirty Little Secrets of Getting Your Dream Job
The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull
The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
The Black Maria
Starving Hearts (Triangular Trade Trilogy, #1)
Shakespeare's Sonnets
Set Me Free
Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)
Rip it Up and Start Again
Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991
Olio
Mesaerion: The Best Science Fiction Stories 1800-1849
Libertarianism for Beginners
It's Only the Himalayas


In [27]:
prices = scraped.select(".price_color")

for price in prices:
    print(price.text)

£51.77
£53.74
£50.10
£47.82
£54.23
£22.65
£33.34
£17.93
£22.60
£52.15
£13.99
£20.66
£17.46
£52.29
£35.02
£57.25
£23.88
£37.59
£51.33
£45.17


In [32]:
for price in prices:
    price = float(price.text.lstrip("£"))
    print(price)

51.77
53.74
50.1
47.82
54.23
22.65
33.34
17.93
22.6
52.15
13.99
20.66
17.46
52.29
35.02
57.25
23.88
37.59
51.33
45.17


In [7]:
# Select all articles

articles = scraped.select(".product_pod")

title_prices = [] # Array

for article in articles:
    title = article.h3.a["title"]
    price = article.find("p", class_="price_color")
    price_float = float(price.text.lstrip("£"))
    title_prices.append({title: price_float}) # Create a Dictionary and append to Array
    
print(title_prices)

[{'A Light in the Attic': 51.77}, {'Tipping the Velvet': 53.74}, {'Soumission': 50.1}, {'Sharp Objects': 47.82}, {'Sapiens: A Brief History of Humankind': 54.23}, {'The Requiem Red': 22.65}, {'The Dirty Little Secrets of Getting Your Dream Job': 33.34}, {'The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull': 17.93}, {'The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics': 22.6}, {'The Black Maria': 52.15}, {'Starving Hearts (Triangular Trade Trilogy, #1)': 13.99}, {"Shakespeare's Sonnets": 20.66}, {'Set Me Free': 17.46}, {"Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)": 52.29}, {'Rip it Up and Start Again': 35.02}, {'Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991': 57.25}, {'Olio': 23.88}, {'Mesaerion: The Best Science Fiction Stories 1800-1849': 37.59}, {'Libertarianism for Beginners': 51.33}, {"It's Only the Himalayas": 45.17}]


## Complete scraper that can navigate links!

In [42]:
import requests
from bs4 import BeautifulSoup

BASE_URL = "http://books.toscrape.com/"
response = requests.get(BASE_URL + "index.html")
html = response.content
scraped = BeautifulSoup(html, 'html.parser')

title_descriptions = []

articles = scraped.select(".product_pod")

for article in articles:
    title = article.h3.a["title"]
    title_url = article.h3.a["href"]
    
    product_response = requests.get(BASE_URL + title_url)
    product_html = product_response.content
    product_scraped = BeautifulSoup(product_html, 'html.parser')
    
    description = product_scraped.find("div", id="product_description").next_sibling.next_sibling
    
    title_descriptions.append({title: description.text.strip()})
    
print(title_descriptions)
    

[{'A Light in the Attic': "It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put you up there,And your cradle, too?Baby, I think someone down here'sGot it in for y