In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [4]:
# Step 1: Target website
url = "https://books.toscrape.com/catalogue/page-1.html"

In [5]:
# Lists to store data
book_titles = []
book_prices = []
book_ratings = []

In [8]:
for page in range(1, 6):  # Scrape first 5 pages
    url = f"https://books.toscrape.com/catalogue/page-{page}.html"
    response = requests.get(url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, "html.parser")

    books = soup.find_all("article", class_="product_pod")

    for book in books:
        try:
            # Extract title
            title = book.h3.a["title"]

            # Extract price (handle encoding)
            price_text = book.find("p", class_="price_color").text
            price_text = price_text.encode('ascii', 'ignore').decode()  # remove special chars
            price = price_text.replace("£", "").strip()

            # Extract rating
            rating = book.p["class"][1]

            # Append only if all fields exist
            if title and price and rating:
                book_titles.append(title)
                book_prices.append(float(price))
                book_ratings.append(rating)

        except Exception as e:
            # Skip book if any field missing or parsing fails
            print(f"Skipped one book due to error: {e}")
            continue

# Ensure all lists have equal length
min_len = min(len(book_titles), len(book_prices), len(book_ratings))
data = pd.DataFrame({
    "Title": book_titles[:min_len],
    "Price (£)": book_prices[:min_len],
    "Rating": book_ratings[:min_len]
})

# Save CSV
data.to_csv("books_data.csv", index=False, encoding='utf-8-sig')

print(f"✅ Web scraping completed successfully! {len(data)} books saved as 'books_data.csv'")
data.head()

✅ Web scraping completed successfully! 200 books saved as 'books_data.csv'


Unnamed: 0,Title,Price (£),Rating
0,A Light in the Attic,51.77,Three
1,A Light in the Attic,53.74,One
2,Tipping the Velvet,50.1,One
3,Soumission,47.82,Four
4,Sharp Objects,54.23,Five
