In [44]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "http://books.toscrape.com/catalogue/page-{}.html"

def scrape_page(page_number):
    url = BASE_URL.format(page_number)
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve page {page_number}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')
    books = soup.find_all('article', class_='product_pod')
    scraped_data = []

    for book in books:
        title = book.h3.a['title']
        price = book.find('p', class_='price_color').text.strip()
        availability = book.find('p', class_='instock availability').text.strip()

        scraped_data.append({
            'Title': title,
            'Price': price,
            'Availability': availability
        })

    return scraped_data

def scrape_multiple_pages(num_pages):
    all_books = []

    for page in range(1, num_pages + 1):
        print(f"Scraping page {page}...")
        books = scrape_page(page)
        all_books.extend(books)
        time.sleep(2)

    return all_books

def main():
    num_pages = 5
    scraped_books = scrape_multiple_pages(num_pages)
    df = pd.DataFrame(scraped_books)
    df.to_csv('scraped_books.csv', index=False)
    print("Scraping completed! Data saved to scraped_books.csv.")

if __name__ == '__main__':
    main()


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping completed! Data saved to scraped_books.csv.
