In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tabulate import tabulate

# 📄 CONFIG
n_pages = 5
base_url = "https://books.toscrape.com/catalogue/page-{}.html"
site_base = "https://books.toscrape.com/"
headers = {
    "User-Agent": "Mozilla/5.0"
}

books = []

def extract_rating(article):
    rating_map = {
        "One": 1,
        "Two": 2,
        "Three": 3,
        "Four": 4,
        "Five": 5
    }
    rating_tag = article.find("p", class_="star-rating")
    rating_classes = rating_tag.get("class", [])
    for cls in rating_classes:
        if cls != "star-rating":
            return rating_map.get(cls, None)
    return None

for page in range(1, n_pages + 1):
    url = base_url.format(page)
    print(f"Scraping: {url}")
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"❌ Failed to load page {page}")
        continue

    # Try lxml first, fallback to html.parser
    try:
        soup = BeautifulSoup(response.text, "lxml")
    except Exception:
        soup = BeautifulSoup(response.text, "html.parser")

    articles = soup.find_all("article", class_="product_pod")

    for article in articles:
        title = article.h3.a["title"]
        price_text = article.find("p", class_="price_color").text.strip()
        price = float(price_text.replace("Â", "").replace("£", ""))
        availability = article.find("p", class_="instock availability").text.strip()
        rating = extract_rating(article)

        img_src = article.find("img")["src"].replace("../", "")
        img_url = site_base + img_src

        books.append({
            "Title": title,
            "Price (£)": price,
            "Availability": availability,
            "Rating": rating,
            "Image URL": img_url
        })

df = pd.DataFrame(books)

print(df.head())

df.to_csv("books_dynamic.csv", index=False)
print("\n✅ Data saved to books_dynamic.csv")


Scraping: https://books.toscrape.com/catalogue/page-1.html
Scraping: https://books.toscrape.com/catalogue/page-2.html
Scraping: https://books.toscrape.com/catalogue/page-3.html
Scraping: https://books.toscrape.com/catalogue/page-4.html
Scraping: https://books.toscrape.com/catalogue/page-5.html
                                   Title  Price (£) Availability  Rating  \
0                   A Light in the Attic      51.77     In stock       3   
1                     Tipping the Velvet      53.74     In stock       1   
2                             Soumission      50.10     In stock       1   
3                          Sharp Objects      47.82     In stock       4   
4  Sapiens: A Brief History of Humankind      54.23     In stock       5   

                                           Image URL  
0  https://books.toscrape.com/media/cache/2c/da/2...  
1  https://books.toscrape.com/media/cache/26/0c/2...  
2  https://books.toscrape.com/media/cache/3e/ef/3...  
3  https://books.toscrape.co

In [16]:
df = pd.read_csv("books_dynamic.csv")["Image URL"]
df.head()

0    https://books.toscrape.com/media/cache/2c/da/2...
1    https://books.toscrape.com/media/cache/26/0c/2...
2    https://books.toscrape.com/media/cache/3e/ef/3...
3    https://books.toscrape.com/media/cache/32/51/3...
4    https://books.toscrape.com/media/cache/be/a5/b...
Name: Image URL, dtype: object

In [12]:
df

Unnamed: 0,Title,Price (£),Availability,Rating,Image URL
0,A Light in the Attic,51.77,In stock,3,https://books.toscrape.com/media/cache/2c/da/2...
1,Tipping the Velvet,53.74,In stock,1,https://books.toscrape.com/media/cache/26/0c/2...
2,Soumission,50.10,In stock,1,https://books.toscrape.com/media/cache/3e/ef/3...
3,Sharp Objects,47.82,In stock,4,https://books.toscrape.com/media/cache/32/51/3...
4,Sapiens: A Brief History of Humankind,54.23,In stock,5,https://books.toscrape.com/media/cache/be/a5/b...
...,...,...,...,...,...
95,Lumberjanes Vol. 3: A Terrible Plan (Lumberjan...,19.92,In stock,2,https://books.toscrape.com/media/cache/5f/b1/5...
96,"Layered: Baking, Building, and Styling Spectac...",40.11,In stock,1,https://books.toscrape.com/media/cache/98/d1/9...
97,Judo: Seven Steps to Black Belt (an Introducto...,53.90,In stock,2,https://books.toscrape.com/media/cache/5f/52/5...
98,Join,35.67,In stock,5,https://books.toscrape.com/media/cache/93/63/9...


In [21]:
from tabulate import tabulate

# Pretty table of first few rows
print(tabulate(df.head(), headers='keys', tablefmt='pretty'))


+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+-----------------------------------------------------------------------------------+-----------------------------------------------------------------------------------+-----------------------------------------------------------------------------------+-----------------------------------------------------------------------------------+-----------------------------------------------------------------------------------+
|   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   | 