In [1]:
def get_title(soup):# Function to extract Product Name

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):

    try:
        # If there is some deal price
        price = soup.find("span", attrs={'class':'a-price-whole'})
        price = price.text
        price = price.strip()

    except:
        price = ""

    return price

# Function to extract Product Rating
def get_rating(soup):
    
    try:
        rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        
    except:
        rating = ""	

    return rating[:3]

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("span", attrs={'class':'a-size-medium a-color-success'})
        available = available.string.strip()

    except AttributeError:
        available = "Not Available"	

    return available

In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By

# Set up the browser
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')  # To run the browser in headless mode
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')

# Initialize the WebDriver
driver = webdriver.Chrome(options=options)

# URL to navigate to
url = "https://www.amazon.in/s?rh=n%3A6612025031&fs=true&ref=lp_6612025031_sar"

# Open the URL
driver.get(url)

# Wait for page elements to load
driver.implicitly_wait(1)

# Get the page source and close the browser
page_source = driver.page_source
driver.quit()

# Create BeautifulSoup object
soup = BeautifulSoup(page_source, 'html.parser')

# Find all links for products
links = soup.find_all("a", class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal")

links_list = [link.get('href') for link in links]  # Extract href from links

# Create an empty DataFrame to store the data
df = {"title": [], "price": [], "rating": [], "availability": []}

# Loop for extracting product details from each link 
for link in links_list:
    # Initialize a new WebDriver instance for each link
    driver = webdriver.Chrome(options=options)
    driver.get("https://www.amazon.in" + link)
    driver.implicitly_wait(1)
    
    # Get page source and create a BeautifulSoup object for the new page
    new_page_source = driver.page_source
    new_soup = BeautifulSoup(new_page_source, 'html.parser')
    
    # Extract product details and append to the DataFrame
    df['title'].append(get_title(new_soup))  # Assuming you have functions like get_title, get_price, etc.
    df['price'].append(get_price(new_soup))
    df['rating'].append(get_rating(new_soup))
    df['availability'].append(get_availability(new_soup))
    
    # Close the WebDriver instance
driver.quit()


In [5]:
#Save the extracted data to a CSV file
data = pd.DataFrame(df)
data.to_csv("amazon_data.csv", index=False)
data

Unnamed: 0,title,price,rating,availability
0,"Ambrane 50000mAh Power Bank, 20W Fast Charging...",4499.0,4.0,In stock
1,Oboe Silicon Soft Cover Case for Mi Power Bank...,349.0,4.0,In stock
2,Ambrane 10000mAh Magnetic Wireless Power Bank ...,1899.0,2.9,In stock
3,MI Power Bank 3i 20000mAh Lithium Polymer 18W ...,2149.0,4.2,In stock
4,"Mi 10000mAH Li-Polymer, Micro-USB and Type C I...",1299.0,4.2,In stock
5,"MI 10000mAh Lithium Ion, Lithium Polymer Power...",1699.0,4.3,In stock
6,Ambrane 20000mAh Power Bank with 20W Fast Char...,1599.0,4.0,In stock
7,"Ambrane 20000mAh Power Bank, 20W Fast Charging...",1599.0,4.0,In stock
8,Duracell 20000 MAH Slimmest Power Bank with 1 ...,3099.0,4.2,In stock
9,"Ambrane 10000mAh Slim Power Bank, 20W Fast Cha...",999.0,3.9,In stock
