In [1]:
# Use Python libraries like BeautifulSoup or Scrapy to extract data from websites
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://books.toscrape.com/"
response = requests.get(url)

soup = BeautifulSoup(response.text, "html.parser")
books = soup.find_all("article", class_="product_pod")

data = []

for book in books:
    title = book.h3.a["title"]
    price = book.find("p", class_="price_color").text
    availability = book.find("p", class_="instock availability").text.strip()
    rating = book.p["class"][1]

    data.append([title, price, rating, availability])

df = pd.DataFrame(data, columns=["Title", "Price", "Rating", "Availability"])
df.to_csv("books_dataset.csv", index=False)

print(df.head())


                                   Title    Price Rating Availability
0                   A Light in the Attic  Â£51.77  Three     In stock
1                     Tipping the Velvet  Â£53.74    One     In stock
2                             Soumission  Â£50.10    One     In stock
3                          Sharp Objects  Â£47.82   Four     In stock
4  Sapiens: A Brief History of Humankind  Â£54.23   Five     In stock


In [3]:
# Handling Navigation (Multiple Pages)
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://books.toscrape.com/catalogue/page-{}.html"

data = []

for page in range(1, 6):
    url = base_url.format(page)
    soup = BeautifulSoup(requests.get(url).text, "html.parser")

    books = soup.find_all("article", class_="product_pod")

    for book in books:
        title = book.h3.a["title"]
        price = book.find("p", class_="price_color").text
        availability = book.find("p", class_="instock availability").text.strip()
        rating = book.p["class"][1]

        data.append([title, price, rating, availability])

df = pd.DataFrame(data, columns=["Title","Price","Rating","Availability"])
df.to_csv("books_full_dataset.csv", index=False)
