In [31]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import json
import pandas as pd

In [24]:
url = 'https://books.toscrape.com/'

In [25]:
response = requests.get(url)

In [26]:
soup = BeautifulSoup(response.text, 'html.parser')

In [27]:
books = soup.find_all('article', class_='product_pod')[:10]

In [30]:
book_list = []
for book in books:
    # Book title
    name = book.h3.a['title']

    # Book price, remove weird characters like 'Â'
    price_text = book.find('p', class_='price_color').text
    price = float(price_text.replace('£', '').replace('Â', '').strip())

    # Full URL
    url = urljoin(url, book.h3.a['href'])

    # Append to list
    book_list.append({
        'name': name,
        'price': price,
        'url': url
    })

# Print in beautiful JSON format
print(json.dumps(book_list, indent=4, ensure_ascii=False))

# Optional: Convert to DataFrame
df = pd.DataFrame(book_list)
print(df)

[
    {
        "name": "A Light in the Attic",
        "price": 51.77,
        "url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/catalogue/tipping-the-velvet_999/catalogue/soumission_998/catalogue/sharp-objects_997/catalogue/sapiens-a-brief-history-of-humankind_996/catalogue/the-requiem-red_995/catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/catalogue/the-boys-in-the-boat-nine-americans-and-their-epic-quest-for-gold-at-the-1936-berlin-olympics_992/catalogue/the-black-maria_991/catalogue/a-light-in-the-attic_1000/catalogue/tipping-the-velvet_999/catalogue/soumission_998/catalogue/sharp-objects_997/catalogue/sapiens-a-brief-history-of-humankind_996/catalogue/the-requiem-red_995/catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/catalogue/t

In [32]:
import pandas as pd

In [33]:
pd.DataFrame(book_list)

Unnamed: 0,name,price,url
0,A Light in the Attic,51.77,https://books.toscrape.com/catalogue/a-light-i...
1,Tipping the Velvet,53.74,https://books.toscrape.com/catalogue/a-light-i...
2,Soumission,50.1,https://books.toscrape.com/catalogue/a-light-i...
3,Sharp Objects,47.82,https://books.toscrape.com/catalogue/a-light-i...
4,Sapiens: A Brief History of Humankind,54.23,https://books.toscrape.com/catalogue/a-light-i...
5,The Requiem Red,22.65,https://books.toscrape.com/catalogue/a-light-i...
6,The Dirty Little Secrets of Getting Your Dream...,33.34,https://books.toscrape.com/catalogue/a-light-i...
7,The Coming Woman: A Novel Based on the Life of...,17.93,https://books.toscrape.com/catalogue/a-light-i...
8,The Boys in the Boat: Nine Americans and Their...,22.6,https://books.toscrape.com/catalogue/a-light-i...
9,The Black Maria,52.15,https://books.toscrape.com/catalogue/a-light-i...


In [34]:
soup.find_all('a')

[<a href="index.html">Books to Scrape</a>,
 <a href="index.html">Home</a>,
 <a href="catalogue/category/books_1/index.html">
                             
                                 Books
                             
                         </a>,
 <a href="catalogue/category/books/travel_2/index.html">
                             
                                 Travel
                             
                         </a>,
 <a href="catalogue/category/books/mystery_3/index.html">
                             
                                 Mystery
                             
                         </a>,
 <a href="catalogue/category/books/historical-fiction_4/index.html">
                             
                                 Historical Fiction
                             
                         </a>,
 <a href="catalogue/category/books/sequential-art_5/index.html">
                             
                                 Sequential Art
            

In [35]:
import requests
from bs4 import BeautifulSoup

url = 'https://books.toscrape.com/catalogue/page-1.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

books = soup.find_all('article', class_='product_pod')[:10]  # only first 10
book_list = []

for book in books:
    name = book.h3.a['title']
    price_text = book.find('p', class_='price_color').text
    price = float(price_text.replace('£', '').replace('Â', '').strip())
    url_book = 'https://books.toscrape.com/catalogue/' + book.h3.a['href']

    book_list.append({
        'name': name,
        'price': price,
        'url': url_book
    })

for b in book_list:
    print(b)


{'name': 'A Light in the Attic', 'price': 51.77, 'url': 'https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html'}
{'name': 'Tipping the Velvet', 'price': 53.74, 'url': 'https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html'}
{'name': 'Soumission', 'price': 50.1, 'url': 'https://books.toscrape.com/catalogue/soumission_998/index.html'}
{'name': 'Sharp Objects', 'price': 47.82, 'url': 'https://books.toscrape.com/catalogue/sharp-objects_997/index.html'}
{'name': 'Sapiens: A Brief History of Humankind', 'price': 54.23, 'url': 'https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html'}
{'name': 'The Requiem Red', 'price': 22.65, 'url': 'https://books.toscrape.com/catalogue/the-requiem-red_995/index.html'}
{'name': 'The Dirty Little Secrets of Getting Your Dream Job', 'price': 33.34, 'url': 'https://books.toscrape.com/catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html'}
{'name': 'The Coming Woman: A N