In [3]:
import requests
from bs4 import BeautifulSoup
import csv

# Function to scrape product information
def scrape_product_info(url):
    if not url.startswith('http'):
        url = 'https://www.amazon.in' + url

    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Initialize variables with default values
    product_name = ''
    product_price = ''
    rating = ''
    review_count = ''
    description = ''
    asin = ''
    product_description = ''
    manufacturer = ''

    # Extract the product name if available
    product_name_elem = soup.find('span', class_='a-size-medium')
    if product_name_elem:
        product_name = product_name_elem.text.strip()

    # Extract the product price if available
    product_price_elem = soup.find('span', class_='a-offscreen')
    if product_price_elem:
        product_price = product_price_elem.text.strip()

    # Extract the rating if available
    rating_elem = soup.find('span', class_='a-icon-alt')
    if rating_elem:
        rating = rating_elem.text.strip()

    # Extract the number of reviews if available
    review_count_elem = soup.find('span', class_='a-size-base')
    if review_count_elem:
        review_count = review_count_elem.text.strip()

    # Extract additional product information if available
    description_elem = soup.select_one('#productDescription p')
    if description_elem:
        description = description_elem.text.strip()

    asin_elem = soup.select_one('#prodDetails th:-soup-contains("ASIN") + td')
    if asin_elem:
        asin = asin_elem.text.strip()

    product_description_elem = soup.select_one('#productDescription .product-description-content')
    if product_description_elem:
        product_description = product_description_elem.text.strip()

    manufacturer_elem = soup.select_one('#bylineInfo a')
    if manufacturer_elem:
        manufacturer = manufacturer_elem.text.strip()

    # Create a dictionary with the scraped data
    data = {
        'Product URL': url,
        'Product Name': product_name,
        'Product Price': product_price,
        'Rating': rating,
        'Number of Reviews': review_count,
        'Description': description,
        'ASIN': asin,
        'Product Description': product_description,
        'Manufacturer': manufacturer
    }

    return data

    

# Function to scrape product listing pages
def scrape_product_listing_pages(start_url, num_pages):
    all_data = []

    for page in range(1, num_pages + 1):
        url = f'{start_url}&page={page}'
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract product URLs
        product_urls = [item['href'] for item in soup.select('a.a-link-normal')]

        for product_url in product_urls:
            # Scrape product information
            data = scrape_product_info(product_url)
            all_data.append(data)

    return all_data

# Define the start URL and number of pages to scrape
start_url = 'https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1'
num_pages = 20

# Scrape product listing pages
scraped_data = scrape_product_listing_pages(start_url, num_pages)
scraped_data
# Write the data to a CSV file
csv_file = 'scraped_data.csv'
fieldnames = ['Product URL', 'Product Name', 'Product Price', 'Rating', 'Number of Reviews',
              'Description', 'ASIN', 'Product Description', 'Manufacturer']

with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(scraped_data)

print(f'Scraped data has been saved to {csv_file}')


PermissionError: [Errno 13] Permission denied: 'scraped_data.csv'