# Web Scraping with Beautiful Soup

**Author:** Shinin Varongchayakul

**Date:** 24 Jan 2026

In [None]:
# Import packages
import requests
import bs4

In [18]:
# Get the web page

# Set the URL
url = "https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html"

# Get the response
response = requests.get(url)

# Encode the response
response.encoding = "utf-8"

# Print the status code
print(response.status_code)

200


In [19]:
# Parse the HTML with bs4

# Create soup
soup = bs4.BeautifulSoup(response.text, "html.parser")

# Print the first 1,000 characters
print(soup.prettify()[:1000])

<!DOCTYPE html>
<!--[if lt IE 7]>      <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html lang="en-us" class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="no-js" lang="en-us">
 <!--<![endif]-->
 <head>
  <title>
   Sapiens: A Brief History of Humankind | Books to Scrape - Sandbox
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <meta content="24th Jun 2016 09:29" name="created"/>
  <meta content="
    From a renowned historian comes a groundbreaking narrative of humanity’s creation and evolution—a #1 international bestseller—that explores the ways in which biology and history have defined us and enhanced our understanding of what it means to be “human.”One hundred thousand years ago, at least six different species of humans inhabited Earth. Yet today there is only one—h From a renowned historian comes a g

In [20]:
# Get the book title
title = soup.find("h1").get_text()

# Print the title
print(title)

Sapiens: A Brief History of Humankind


In [21]:
# Get the img tag
img_tag = soup.find("img")

# Extract the src attribute
image_relative_url = img_tag.get("src")

# Set base URL
base_url = "https://books.toscrape.com/"

# Concatenate the image URL
image_full_url = base_url + image_relative_url.replace("../", "")

# Print the image
print(image_full_url)

https://books.toscrape.com/media/cache/ce/5f/ce5f052c65cc963cf4422be096e915c9.jpg


In [22]:
# Get the price
price = soup.find("p", class_="price_color").get_text()

# Print the price
print(price)

£54.23


In [23]:
# Get the product description header
description_header = soup.find("div", id="product_description")

# Get the product description
description = description_header.find_next_sibling("p").get_text()

# Print the description
print(description)

From a renowned historian comes a groundbreaking narrative of humanity’s creation and evolution—a #1 international bestseller—that explores the ways in which biology and history have defined us and enhanced our understanding of what it means to be “human.”One hundred thousand years ago, at least six different species of humans inhabited Earth. Yet today there is only one—h From a renowned historian comes a groundbreaking narrative of humanity’s creation and evolution—a #1 international bestseller—that explores the ways in which biology and history have defined us and enhanced our understanding of what it means to be “human.”One hundred thousand years ago, at least six different species of humans inhabited Earth. Yet today there is only one—homo sapiens. What happened to the others? And what may happen to us?Most books about the history of humanity pursue either a historical or a biological approach, but Dr. Yuval Noah Harari breaks the mold with this highly original book that begins ab

In [24]:
# Get the product information

# Instantiate an empty dict
product_info = {}

# Get the product info table
product_info_table = soup.find("table", class_="table table-striped")

# Print the table
print(product_info_table)

# Loop through the rows
for row in product_info_table.find_all("tr"):

    # Get the label
    key = row.find("th").get_text()

    # Get the value
    value = row.find("td").get_text()

    # Append to product_info
    product_info[key] = value

# Print the product info
print(product_info)

<table class="table table-striped">
<tr>
<th>UPC</th><td>4165285e1663650f</td>
</tr>
<tr>
<th>Product Type</th><td>Books</td>
</tr>
<tr>
<th>Price (excl. tax)</th><td>£54.23</td>
</tr>
<tr>
<th>Price (incl. tax)</th><td>£54.23</td>
</tr>
<tr>
<th>Tax</th><td>£0.00</td>
</tr>
<tr>
<th>Availability</th>
<td>In stock (20 available)</td>
</tr>
<tr>
<th>Number of reviews</th>
<td>0</td>
</tr>
</table>
{'UPC': '4165285e1663650f', 'Product Type': 'Books', 'Price (excl. tax)': '£54.23', 'Price (incl. tax)': '£54.23', 'Tax': '£0.00', 'Availability': 'In stock (20 available)', 'Number of reviews': '0'}
