# Web Scraping with BeautifulSoup
This notebook demonstrates web scraping using BeautifulSoup and requests on the 'Books to Scrape' website.

In [None]:
import requests
from bs4 import BeautifulSoup


### Get and Parse HTML

Now that we know where the information exists, let's retrieve the HTML tags and parse them into a useful information for us. First off, let's retrieve the entire web page.

In [None]:
# URL of the page to scrape
url = "http://books.toscrape.com/"

# Step 1: Send a request to fetch the HTML content
response = requests.get(url)
if response.status_code == 200:
    print("Successfully fetched the page")
else:
    print("Failed to fetch the page")


Successfully fetched the page


In [None]:
# Step 2: Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')


In [None]:
print(soup.prettify())

<!DOCTYPE html>
<!--[if lt IE 7]>      <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html lang="en-us" class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="no-js" lang="en-us">
 <!--<![endif]-->
 <head>
  <title>
   All products | Books to Scrape - Sandbox
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <meta content="24th Jun 2016 09:29" name="created"/>
  <meta content="" name="description"/>
  <meta content="width=device-width" name="viewport"/>
  <meta content="NOARCHIVE,NOCACHE" name="robots"/>
  <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
  <!--[if lt IE 9]>
        <script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>
        <![endif]-->
  <link href="../static/oscar/favicon.ico" rel="shortcut icon"/>
  <link href="../static/oscar/css/styles.css" rel="stylesheet" typ

In [None]:
li = soup.find('li', {'class': 'col-xs-6 col-sm-4 col-md-3 col-lg-3'})
print(li)

<li class="col-xs-6 col-sm-4 col-md-3 col-lg-3">
<article class="product_pod">
<div class="image_container">
<a href="in-her-wake_980/index.html"><img alt="In Her Wake" class="thumbnail" src="../media/cache/5d/72/5d72709c6a7a9584a4d1cf07648bfce1.jpg"/></a>
</div>
<p class="star-rating One">
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
</p>
<h3><a href="in-her-wake_980/index.html" title="In Her Wake">In Her Wake</a></h3>
<div class="product_price">
<p class="price_color">Â£12.84</p>
<p class="instock availability">
<i class="icon-ok"></i>
    
        In stock
    
</p>
<form>
<button class="btn btn-primary btn-block" data-loading-text="Adding..." type="submit">Add to basket</button>
</form>
</div>
</article>
</li>


In [None]:
li = soup.find_all('li', {'class': 'col-xs-6 col-sm-4 col-md-3 col-lg-3'})
print(li[0])

<li class="col-xs-6 col-sm-4 col-md-3 col-lg-3">
<article class="product_pod">
<div class="image_container">
<a href="frankenstein_20/index.html"><img alt="Frankenstein" class="thumbnail" src="../media/cache/00/25/0025515e987a1ebd648773f9ac70bfe6.jpg"/></a>
</div>
<p class="star-rating Two">
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
</p>
<h3><a href="frankenstein_20/index.html" title="Frankenstein">Frankenstein</a></h3>
<div class="product_price">
<p class="price_color">£38.00</p>
<p class="instock availability">
<i class="icon-ok"></i>
    
        In stock
    
</p>
<form>
<button class="btn btn-primary btn-block" data-loading-text="Adding..." type="submit">Add to basket</button>
</form>
</div>
</article>
</li>


In [None]:
# Step 3: Extract book details for the first page instock availability
books = soup.find_all('article', class_='product_pod')
for book in books:
    title = book.h3.a['title']
    price = book.find('p', class_='price_color').text
    print(f"Title: {title}")
    print(f"Price: {price}")
    print("-" * 30)

Title: Frankenstein
Price: £38.00
------------------------------
Title: Forever Rockers (The Rocker #12)
Price: £28.80
------------------------------
Title: Fighting Fate (Fighting #6)
Price: £39.24
------------------------------
Title: Emma
Price: £32.93
------------------------------
Title: Eat, Pray, Love
Price: £51.32
------------------------------
Title: Deep Under (Walker Security #1)
Price: £47.09
------------------------------
Title: Choosing Our Religion: The Spiritual Lives of America's Nones
Price: £28.42
------------------------------
Title: Charlie and the Chocolate Factory (Charlie Bucket #1)
Price: £22.85
------------------------------
Title: Charity's Cross (Charles Towne Belles #4)
Price: £41.24
------------------------------
Title: Bright Lines
Price: £39.07
------------------------------
Title: Bridget Jones's Diary (Bridget Jones #1)
Price: £29.82
------------------------------
Title: Bounty (Colorado Mountain #7)
Price: £37.26
------------------------------
Title: 

In [None]:
# Step 3: Extract book details for the first page with stock availability
books = soup.find_all('article', class_='product_pod')
for book in books:
    title = book.h3.a['title']
    price = book.find('p', class_='price_color').text
    inStock = book.find('p', class_='instock availability').text
    print(f"Title: {title}")
    print(f"Price: {price}")
    print(f"Availability: {inStock}");
    print("-" * 30)

Title: Frankenstein
Price: £38.00
Availability: 

    
        In stock
    

------------------------------
Title: Forever Rockers (The Rocker #12)
Price: £28.80
Availability: 

    
        In stock
    

------------------------------
Title: Fighting Fate (Fighting #6)
Price: £39.24
Availability: 

    
        In stock
    

------------------------------
Title: Emma
Price: £32.93
Availability: 

    
        In stock
    

------------------------------
Title: Eat, Pray, Love
Price: £51.32
Availability: 

    
        In stock
    

------------------------------
Title: Deep Under (Walker Security #1)
Price: £47.09
Availability: 

    
        In stock
    

------------------------------
Title: Choosing Our Religion: The Spiritual Lives of America's Nones
Price: £28.42
Availability: 

    
        In stock
    

------------------------------
Title: Charlie and the Chocolate Factory (Charlie Bucket #1)
Price: £22.85
Availability: 

    
        In stock
    

--------------------

In [None]:
# Step 4: Extract book details for multiple pages
for page in range(1, 3):  # Scraping the first 2 pages as an example
    url = f"http://books.toscrape.com/catalogue/page-{page}.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    books = soup.find_all('article', class_='product_pod')
    for book in books:
        title = book.h3.a['title']
        price = book.find('p', class_='price_color').text
        print(f"Title: {title}")
        print(f"Price: {price}")
        print("-" * 30)


Title: A Light in the Attic
Price: Â£51.77
------------------------------
Title: Tipping the Velvet
Price: Â£53.74
------------------------------
Title: Soumission
Price: Â£50.10
------------------------------
Title: Sharp Objects
Price: Â£47.82
------------------------------
Title: Sapiens: A Brief History of Humankind
Price: Â£54.23
------------------------------
Title: The Requiem Red
Price: Â£22.65
------------------------------
Title: The Dirty Little Secrets of Getting Your Dream Job
Price: Â£33.34
------------------------------
Title: The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull
Price: Â£17.93
------------------------------
Title: The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
Price: Â£22.60
------------------------------
Title: The Black Maria
Price: Â£52.15
------------------------------
Title: Starving Hearts (Triangular Trade Trilogy, #1)
Price: Â£13.99
------------------------------
T