In [12]:
import requests
from bs4 import BeautifulSoup
import csv
import logging

In [13]:
BASE_URL = "https://books.toscrape.com/catalogue/page-{}.html"

In [14]:
logging.basicConfig(
    filename="scraper.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

In [15]:
def get_rating(tag):
    
    ratings = {
        "One": "1",
        "Two": "2",
        "Three": "3",
        "Four": "4",
        "Five": "5"
    }
    for cls in tag["class"]:
        if cls in ratings:
            return ratings[cls]
    return "NA"

In [16]:
def scrape_page(page_no):
    
    url = BASE_URL.format(page_no)
    logging.info(f"Scraping page {page_no}")

    response = requests.get(url)
    if response.status_code != 200:
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    books = soup.find_all("article", class_="product_pod")

    page_data = []

    for book in books:
        title = book.h3.a["title"]
        price = book.find("p", class_="price_color").text.strip()
        availability = book.find("p", class_="instock availability").text.strip()
        rating = get_rating(book.find("p", class_="star-rating"))
        link = book.h3.a["href"]

        page_data.append({
            "Title": title,
            "Price": price,
            "Rating": rating,
            "Availability": availability,
            "Link": "https://books.toscrape.com/catalogue/" + link
        })

    return page_data

In [17]:
def save_csv(data, filename="books.csv"):
    
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)

In [20]:
def main():
    all_books = []

    for page in range(1, 6):  # scrape first 5 pages (can increase)
        books = scrape_page(page)
        if not books:
            break
        all_books.extend(books)

    save_csv(all_books)
    
    logging.info(f"Total books scraped: {len(all_books)}")
    print("Scraping completed successfully!")

In [21]:
if __name__ == "__main__":
    main()

Scraping completed successfully!
