In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the website
BASE_URL = "http://books.toscrape.com/catalogue/page-{}.html"

# Lists to store data
titles = []
authors = []
prices = []
availability = []
ratings = []

# Dictionary to convert star ratings to numbers
rating_map = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}

# Loop through multiple pages (first 5 pages as an example)
for page in range(1, 6):
    url = BASE_URL.format(page)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Find all book containers
    books = soup.find_all("article", class_="product_pod")
    
    for book in books:
        # Get title
        title = book.h3.a["title"]
        titles.append(title)
        
        # Get author (Assuming author is inside product description, adjust if needed)
        book_url = "http://books.toscrape.com/catalogue/" + book.h3.a["href"]
        book_response = requests.get(book_url)
        book_soup = BeautifulSoup(book_response.text, "html.parser")
        author = book_soup.find("table", class_="table table-striped").find_all("tr")[1].td.text.strip()
        authors.append(author)
        
        # Get price
        price = book.find("p", class_="price_color").text.strip()
        prices.append(price)
        
        # Get availability
        stock = book.find("p", class_="instock availability").text.strip()
        availability.append(stock)
        
        # Get rating
        rating_class = book.p["class"][1]  # Example: "star-rating Three"
        ratings.append(rating_map.get(rating_class, 0))

# Create a DataFrame
df = pd.DataFrame({
    "Title": titles,
    "Author": authors,
    "Price": prices,
    "Availability": availability,
    "Rating": ratings
})

# Save the data
df.to_csv("books_data.csv", index=False)

print("Scraping complete! Data saved as books_data.csv")


Scraping complete! Data saved as books_data.csv
