In [1]:
# Project 2: Top Books Scraper

!pip install requests beautifulsoup4 pandas lxml

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://books.toscrape.com/"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

books = soup.find_all("article", class_="product_pod")

book_data = []
rating_map = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}

for book in books:
    title = book.h3.a["title"]

    price_text = book.find("p", class_="price_color").text
    price = float(re.sub(r"[^0-9.]", "", price_text))

    rating_class = book.find("p", class_="star-rating")["class"]
    rating_word = [r for r in rating_class if r in rating_map][0]
    rating = rating_map[rating_word]

    book_data.append([title, price, rating])

#  Create DataFrame
df = pd.DataFrame(book_data, columns=["Title", "Price (£)", "Rating (1-5)"])

# Save to CSV
df.to_csv("books_data.csv", index=False)

#  Analysis
avg_price = df["Price (£)"].mean()
best_book = df.loc[df["Rating (1-5)"].idxmax()]

print("📚 Scraped Books Data (first 5):")
print(df.head(), "\n")
print(f"💰 Average Price: £{avg_price:.2f}")
print(f"⭐ Best Rated Book: {best_book['Title']} (Rating: {best_book['Rating (1-5)']})")


📚 Scraped Books Data (first 5):
                                   Title  Price (£)  Rating (1-5)
0                   A Light in the Attic      51.77             3
1                     Tipping the Velvet      53.74             1
2                             Soumission      50.10             1
3                          Sharp Objects      47.82             4
4  Sapiens: A Brief History of Humankind      54.23             5 

💰 Average Price: £38.05
⭐ Best Rated Book: Sapiens: A Brief History of Humankind (Rating: 5)
