In [2]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

# Setup Selenium WebDriver (with headless mode disabled for debugging)
options = Options()
options.add_argument("--headless")  # To run in headless mode
# options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
# options.add_argument("--log-level=3")  # Suppress warnings

# Initialize WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Amazon Search Results URL
URL = "https://www.amazon.in/s?rh=n%3A6612025031&fs=true&ref=lp_6612025031_sar"

def get_product_links(url):
    """
    Extracts all product links from the Amazon search results page.
    Uses Selenium to navigate and find links dynamically.
    """
    product_links = []
    
    try:
        driver.get(url)
        time.sleep(3)  # Allow time for page to load

        # Scroll to the bottom to load more products
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Give some time for elements to load

        # Find all product links
        product_elements = driver.find_elements(By.CSS_SELECTOR, "a.a-link-normal.s-no-outline")
        product_links = [element.get_attribute("href") for element in product_elements if element.get_attribute("href")]

        print(f"🔍 Found {len(product_links)} product links.")
    
    except Exception as e:
        print(f"❌ Error fetching product links: {e}")

    return product_links

def get_product_details(url):
    """
    Extracts product details from an individual product page.
    Handles missing elements to avoid errors.
    """
    try:
        driver.get(url)
        time.sleep(2)  # Allow time for page to load fully

        # Extract product title
        title_element = driver.find_elements(By.ID, "productTitle")
        title = title_element[0].text.strip() if title_element else "N/A"

        # Extract product price
        price_element = driver.find_elements(By.CLASS_NAME, "a-price-whole")
        price = price_element[0].text.strip() if price_element else "N/A"

        # Extract product rating
        # Locate the outermost span element by its ID
        rating_outer_span = driver.find_element(By.ID, "acrPopover")
        
        # Get the value of the title attribute
        rating = rating_outer_span.get_attribute("title")
        rating = rating.split(" ")[0]
     
        # Extract seller information
        seller_element = driver.find_elements(By.ID, "sellerProfileTriggerId")
        seller = seller_element[0].text.strip() if seller_element else "N/A"

        # Extract availability status
        availability_element = driver.find_elements(By.ID, "availability")
        availability = availability_element[0].text.strip() if availability_element else "N/A"

        # Extract total number of ratings
        total_ratings_element = driver.find_elements(By.ID, "acrCustomerReviewText")
        total_ratings = total_ratings_element[0].text.strip().replace(" ratings", "") if total_ratings_element else "N/A"

        return {
            "Title": title,
            "Price": price,
            "Rating (Out of 5)": rating,
            "Seller": seller,
            "Availability": availability,
            "Total Ratings": total_ratings,
        }

    except Exception as e:
        print(f"❌ Error fetching product details for {url}: {e}")
        return None


# Step 1: Get all product links
print("🔍 Fetching product links...")
product_links = get_product_links(URL)
print(f"✅ Found {len(product_links)} products.")

# Step 2: Scrape product details
product_data = []
for index, link in enumerate(product_links):
    print(f"📦 Scraping Product: ({index + 1}/{len(product_links)})")
    details = get_product_details(link)
    if details:
        product_data.append(details)

# Step 3: Store the data in a Pandas DataFrame and save as CSV
if product_data:
    df = pd.DataFrame(product_data)
    df.to_csv("amazon_products_selenium.csv", index=False)
    print("✅ Data saved to 'amazon_products_selenium.csv' successfully!")
else:
    print("⚠️ No data scraped. Please check your script or Amazon's page structure.")

# Close the Selenium WebDriver
driver.quit()


🔍 Fetching product links...
🔍 Found 24 product links.
✅ Found 24 products.
📦 Scraping Product: (1/24)
📦 Scraping Product: (2/24)
📦 Scraping Product: (3/24)
📦 Scraping Product: (4/24)
📦 Scraping Product: (5/24)
📦 Scraping Product: (6/24)
📦 Scraping Product: (7/24)
📦 Scraping Product: (8/24)
📦 Scraping Product: (9/24)
📦 Scraping Product: (10/24)
📦 Scraping Product: (11/24)
📦 Scraping Product: (12/24)
📦 Scraping Product: (13/24)
📦 Scraping Product: (14/24)
📦 Scraping Product: (15/24)
📦 Scraping Product: (16/24)
📦 Scraping Product: (17/24)
📦 Scraping Product: (18/24)
📦 Scraping Product: (19/24)
📦 Scraping Product: (20/24)
📦 Scraping Product: (21/24)
📦 Scraping Product: (22/24)
📦 Scraping Product: (23/24)
📦 Scraping Product: (24/24)
✅ Data saved to 'amazon_products_selenium.csv' successfully!
