# Amazon Product Scraper - Step by Step Guide

This notebook demonstrates how to scrape Amazon product information step by step. We'll extract:
- Product Names
- Star Ratings  
- Number of Ratings
- Prices

Let's start by importing the necessary libraries.

In [None]:
# Import Required Libraries
import requests
import json
import csv
from bs4 import BeautifulSoup
import re

Here we set up the Amazon search URL and headers to mimic a real browser request.

In [None]:
# Define Search Parameters
url = "https://www.amazon.com/s/........" # Add your search URL here

payload = {}
headers = {
    # This value is intentionally left blank. Make sure to fill it in with actual headers from Postman to mimic a real browser request.
}

In [None]:
# Make the GET Request
response = requests.request("GET", url, headers=headers, data=payload)

print(f"Status Code: {response.status_code}")
print(f"Response Length: {len(response.text)} characters")

if response.status_code == 200:
    print("Request successful!")
else:
    print("Request failed!")


Status Code: 200
Response Length: 1734724 characters
Request successful!


In [4]:
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find all product containers
product_containers = soup.find_all('div', {'data-component-type': 's-search-result'})

print(f"Found {len(product_containers)} product containers")


Found 48 product containers


In [5]:
container = product_containers[0]
product_name = container.find('h2').get_text(strip=True)
print("Product Name:", product_name)

Product Name: Folgers Classic Roast Medium Roast Ground Coffee, 33.7 Ounces (Pack of 6)


In [6]:
review_block = container.find('div', {'data-cy': 'reviews-block'})

In [17]:
star_rating = review_block.find('span', {'class': 'a-size-small'}).get_text(strip=True)
number_of_reviews = review_block.find('div', {'data-csa-c-content-id': 'alf-customer-ratings-count-component'}).find('a').get('aria-label')

print("Star Rating:", star_rating, "stars")
print("Number of Ratings:", number_of_reviews)

Star Rating: 4.8 stars
Number of Ratings: 2,020 ratings


In [9]:
price = container.find('span', {'class': 'a-offscreen'}).get_text(strip=True)
print("Price: $", price)

Price: $ $77.84


In [18]:
def extract_all_products():
    products = []
    
    for container in product_containers:
        product = {}
        
        # Extract Product Name
        name_element = container.find('h2')
        if name_element:
            product['product_name'] = name_element.get_text(strip=True)
        else:
            product['product_name'] = 'N/A'
        
        # Extract Stars Rating and Number of Reviews
        review_block = container.find('div', {'data-cy': 'reviews-block'})
        if review_block:
            # Extract star rating
            star_rating_element = review_block.find('span', {'class': 'a-size-small'})
            if star_rating_element:
                rating_text = star_rating_element.get_text(strip=True)
                rating_match = re.search(r'(\d+\.?\d*)', rating_text)
                product['star_rating'] = rating_match.group(1) if rating_match else 'N/A'
            else:
                product['star_rating'] = 'N/A'
            
            # Extract number of reviews
            reviews_element = review_block.find('div', {'data-csa-c-content-id': 'alf-customer-ratings-count-component'})
            if reviews_element:
                reviews_link = reviews_element.find('a')
                if reviews_link:
                    reviews_text = reviews_link.get('aria-label', '')
                    reviews_match = re.search(r'([\d,]+)', reviews_text)
                    product['num_rating'] = reviews_match.group(1) if reviews_match else 'N/A'
                else:
                    product['num_rating'] = 'N/A'
            else:
                product['num_rating'] = 'N/A'
        else:
            product['star_rating'] = 'N/A'
            product['num_rating'] = 'N/A'
        
        # Extract Price
        price_element = container.find('span', {'class': 'a-offscreen'})
        if price_element:
            price_text = price_element.get_text(strip=True)
            # Clean up the price text (remove extra $ symbols)
            price_clean = price_text.replace('$', '').strip()
            product['price'] = price_clean if price_clean else 'N/A'
        else:
            product['price'] = 'N/A'
        
        # Only add products that have at least a name
        if product['product_name'] != 'N/A':
            products.append(product)
    
    return products

def save_to_csv(products, filename='amazon_products.csv'):
    """Save products data to CSV file"""
    if not products:
        print("No products to save!")
        return
    
    # Define CSV fieldnames
    fieldnames = ['product_name', 'star_rating', 'num_rating', 'price']
    
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(products)
    
    print(f"Data saved to {filename}")

# Extract all products
all_products = extract_all_products()

# Save to CSV
save_to_csv(all_products)

# Display results
print(f"Found {len(all_products)} products:")
print("=" * 80)
for i, product in enumerate(all_products, 1):
    print(f"{i}. {product['product_name']}")
    print(f"   Star Rating: {product['star_rating']} stars")
    print(f"   Number of Ratings: {product['num_rating']}")
    print(f"   Price: ${product['price']}")
    print("-" * 80)


Data saved to amazon_products.csv
Found 48 products:
1. Folgers Classic Roast Medium Roast Ground Coffee, 33.7 Ounces (Pack of 6)
   Star Rating: 4.8 stars
   Number of Ratings: 2,020
   Price: $77.84
--------------------------------------------------------------------------------
2. Nespresso Capsules Vertuo, Variety Pack, Medium and Dark Roast Coffee, 30 Count Coffee Pods, Brews 7.8 oz.
   Star Rating: 4.8 stars
   Number of Ratings: 122,801
   Price: $43.08
--------------------------------------------------------------------------------
3. Peet's Coffee, Dark Roast Single Serve Coffee Pods - Major Dickason's Blend 75 Count (1 Box of 75 Pods)
   Star Rating: 4.7 stars
   Number of Ratings: 57,331
   Price: $42.98
--------------------------------------------------------------------------------
4. Maxwell House 27.5oz Ground Coffee Medium Original Roast
   Star Rating: 4.7 stars
   Number of Ratings: 20,923
   Price: $12.97
--------------------------------------------------------------