# Web Scraping

In [1]:
pip install requests beautifulsoup4 pandas


Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_books(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    books = []
    for book in soup.find_all('article', class_='product_pod'):
        # Extract product name
        name = book.h3.a['title']

        # Extract product price
        price = book.find('p', class_='price_color').text

        # Extract product rating
        rating = book.p['class'][1]  # e.g., "star-rating Three"

        # Store the information in a dictionary
        books.append({
            'Name': name,
            'Price': price,
            'Rating': rating
        })

    return books

def save_to_csv(data, filename='products.csv'):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

def main():
    # URL of the page to scrape
    url = 'http://books.toscrape.com/catalogue/category/books/fiction_10/index.html'
    
    # Scrape the data
    products = scrape_books(url)
    
    # Save the data to a CSV file
    save_to_csv(products)

if __name__ == "__main__":
    main()


Data saved to products.csv
