In [191]:
import requests
from bs4 import BeautifulSoup

# Set up the necessary variables
base_url = "https://www.amazon.in/s"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Accept-Language': 'en-US,en;q=0.9',
}
params = {
    'k': 'mobiles',
    'i': 'electronics',
    'rh': 'p_36:1318507031',
    'dc': '',
    'rnid': '1318502031',
    'ref': 'sr_nr_p_36_5',
    'ds': 'v1:WM7Vc8+shQNXBAzmi7Dzfrqb0wIzklzAEYWHRakNN/I',
}

product_titles = []
ratings = []
prices = []

# Specify the range of pages to scrape
start_page = 1
end_page = 20

# Iterate through the pages
for page in range(start_page, end_page + 1):
    params['page'] = page  # Add the page parameter to the params dictionary

    # Send an HTTP GET request and retrieve the HTML content
    response = requests.get(base_url, headers=headers, params=params)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find and extract the desired data using appropriate selectors
    products = soup.find_all('div', {'data-component-type': 's-search-result'})
    for product in products:
        try:
            title = product.find('span', class_='a-size-medium a-color-base a-text-normal').text
        except AttributeError:
            title = "N/A"  # Set a default value if the title is not found
        
        try:
            rating = product.find('span', class_='a-icon-alt').text
        except AttributeError:
            rating = "N/A"  # Set a default value if the rating is not found
        
        try:
            price = product.find('span', class_='a-price-whole').text
        except AttributeError:
            price = "N/A"  # Set a default value if the price is not found
        
        product_titles.append(title)
        ratings.append(rating)
        prices.append(price)

# Print or process the extracted data as required
for title, rating, price  in zip(product_titles, ratings, prices):
    print(f"Title: {title}")
    print(f"Rating: {rating}")
    print(f"Price: {price}")
    
    print()

# Perform any necessary post-processing on the data

# Save or use the extracted data as required


Title: Apple iPhone 14 Pro Max (128 GB) - Space Black
Rating: 4.5 out of 5 stars
Price: 1,27,999

Title: Redmi K50i 5G (Quick Silver, 6GB RAM, 128GB Storage) | Flagship Mediatek Dimensity 8100 Processor | 144Hz Liquid FFS Display
Rating: 4.0 out of 5 stars
Price: 20,999

Title: Tecno Camon 20 Pro 5G (Dark Welkin, 8GB RAM,256GB Storage)| India's 1st MediaTek Dimensity 8050 Processor | 16GB Expandable RAM | 64MP RGBW(G+P) OIS Rear Camera|6.67 FHD+ Big AMOLED Screen
Rating: 4.1 out of 5 stars
Price: 21,999

Title: OnePlus 11 5G (Eternal Green, 16GB RAM, 256GB Storage)
Rating: 4.3 out of 5 stars
Price: 61,999

Title: Samsung Galaxy A14 5G (Dark Red, 8GB, 128GB Storage) | Triple Rear Camera (50 MP Main) | Upto 16 GB RAM with RAM Plus | Without Charger
Rating: 3.8 out of 5 stars
Price: 20,999

Title: Samsung Galaxy A9 (Caviar Black, 8GB RAM, 128GB Storage)
Rating: 4.0 out of 5 stars
Price: 20,990

Title: Samsung Galaxy A34 5G (Awesome Silver, 8GB, 128GB Storage) | 48 MP No Shake Cam (OIS) | 

In [180]:
# Create a dictionary using the extracted data
data = {
    'Product Title': product_titles,
    'Rating': ratings,
    'Price': prices
}


In [181]:
import pandas as pd
# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)



In [182]:
# Print the DataFrame
df

Unnamed: 0,Product Title,Rating,Price
0,"Samsung Galaxy A34 5G (Awesome Silver, 8GB, 12...",4.0 out of 5 stars,30999
1,Apple iPhone 14 (128 GB) - Purple,4.5 out of 5 stars,67499
2,"OnePlus 11R 5G (Sonic Black, 16GB RAM, 256GB S...",4.4 out of 5 stars,44999
3,"Redmi Note 12 5G (Matte Black,8GB RAM, 256GB S...",3.8 out of 5 stars,20999
4,"Redmi Note 12 5G (Mystique Blue,8GB RAM, 256GB...",3.8 out of 5 stars,20999
...,...,...,...
643,"Vivo V20 Pro (Sunset Melody, 8GB RAM, 128GB St...",4.3 out of 5 stars,27000
644,"OUKITEL WP9 Rugged Smartphone, 8000mAh Battery...",3.8 out of 5 stars,20669
645,India Gadgets - BV8800 Rugged Mobile Phone: 8G...,,39500
646,Apple iPhone 14 Plus (128 GB) - Blue,4.5 out of 5 stars,76499


In [183]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 648 entries, 0 to 647
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Product Title  648 non-null    object
 1   Rating         648 non-null    object
 2   Price          648 non-null    object
dtypes: object(3)
memory usage: 15.3+ KB


In [184]:
import numpy as np

# Replace 'N/A' values with NaN
df['Price'] = df['Price'].replace('N/A', np.nan)

# Convert the Price column to floatd
df['Price'] = df['Price'].str.replace(',', '')  # Remove any commas in the string representation of numbers
df['Price'] = df['Price'].str.replace('₹', '')  # Remove the currency symbol
df['Price'] = df['Price'].astype(float)


import pandas as pd
# Extract the rating value and convert it to a numeric format
df['Rating'] = df['Rating'].str.extract(r'(\d+\.\d+)').astype(float)
df['Rating'].fillna(0, inplace=True)



In [185]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 648 entries, 0 to 647
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Product Title  648 non-null    object 
 1   Rating         648 non-null    float64
 2   Price          587 non-null    float64
dtypes: float64(2), object(1)
memory usage: 15.3+ KB


In [186]:
df

Unnamed: 0,Product Title,Rating,Price
0,"Samsung Galaxy A34 5G (Awesome Silver, 8GB, 12...",4.0,30999.0
1,Apple iPhone 14 (128 GB) - Purple,4.5,67499.0
2,"OnePlus 11R 5G (Sonic Black, 16GB RAM, 256GB S...",4.4,44999.0
3,"Redmi Note 12 5G (Matte Black,8GB RAM, 256GB S...",3.8,20999.0
4,"Redmi Note 12 5G (Mystique Blue,8GB RAM, 256GB...",3.8,20999.0
...,...,...,...
643,"Vivo V20 Pro (Sunset Melody, 8GB RAM, 128GB St...",4.3,27000.0
644,"OUKITEL WP9 Rugged Smartphone, 8000mAh Battery...",3.8,20669.0
645,India Gadgets - BV8800 Rugged Mobile Phone: 8G...,0.0,39500.0
646,Apple iPhone 14 Plus (128 GB) - Blue,4.5,76499.0


In [188]:
df.describe()

Unnamed: 0,Rating,Price
count,648.0,587.0
mean,3.725772,55228.936968
std,1.249358,35521.413702
min,0.0,13990.0
25%,3.6,26173.5
50%,4.2,41500.0
75%,4.5,76499.0
max,5.0,159900.0


In [189]:
products

[<div class="sg-col-20-of-24 s-result-item s-asin sg-col-0-of-12 sg-col-16-of-20 AdHolder sg-col s-widget-spacing-small sg-col-12-of-16" data-asin="B0BDK62PDX" data-component-type="s-search-result" data-index="3" data-uuid="54a784d6-0ed8-45cc-a780-df23fecbf3ee"><div class="sg-col-inner"><div cel_widget_id="MAIN-SEARCH_RESULTS-3" class="s-widget-container s-spacing-small s-widget-container-height-small celwidget slot=MAIN template=SEARCH_RESULTS widgetId=search-results_457" data-csa-c-item-id="amzn1.asin.1.B0BDK62PDX" data-csa-c-pos="457" data-csa-c-type="item" data-csa-op-log-render="">
 <div class="rush-component" data-component-props='{"percentageShownToFire":"50","batchable":true,"requiredElementSelector":".s-image:visible","url":"https://unagi-eu.amazon.com/1/events/com.amazon.eel.SponsoredProductsEventTracking.prod?qualifier=1687771586&amp;id=5637086126794898&amp;widgetName=sp_atf_next&amp;adId=20111211247698&amp;eventType=1&amp;adIndex=0"}' data-component-type="s-impression-logge