In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random


headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/119 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Referer": "https://www.google.com/"
}

base_url = "https://www.amazon.com/s?k=sport+sheos&page={}"
products = []

for page in range(1, 40):
    print(f"Scraping Amazon page {page}...")
    url = base_url.format(page)
    
    try:
        response = requests.get(url, headers=headers)
        
        if response.status_code != 200:
            print(f"  -> Error: Failed to load (Status Code: {response.status_code})")
            continue

        soup = BeautifulSoup(response.text, "html.parser")
        page_title = soup.title.text.strip() if soup.title else "No Title"
        
        if "Robot Check" in page_title or "CAPTCHA" in page_title:
            print("  -> BLOCKED: Amazon sent a CAPTCHA page. Stopping script.")
            break

        items = soup.select("div.s-main-slot div[data-component-type='s-search-result']")
        
        if not items:
            print(f"  -> Warning: No products found on page {page}.")
            continue

        print(f"  -> Found {len(items)} products.")

        for item in items:
           
            try:
                name = item.h2.text.strip()
            except:
                name = None

            
            try:
                price = item.select_one(".a-price-whole").text.replace(",", "")
            except:
                price = None

            
            try:
                rating_str = item.select_one(".a-icon-alt").text.split()[0]
            except:
                rating_str = None

            
            sentiment = "Not Rated"
            if rating_str:
                try:
                    rating_val = float(rating_str)
                    if rating_val > 4.5:
                        sentiment = "Positive"
                    elif rating_val >= 4.0:  
                        sentiment = "Neutral"
                    else:
                        sentiment = "Negative"
                except ValueError:
                    sentiment = "Unknown"

            
            brand = name.split()[0] if name else None

            
            products.append({
                "Product Name": name,
                "Brand": brand,
                "Price (INR)": price,
                "Rating": rating_str,
                "Sentiment": sentiment,  
                "Availability": "In Stock"
            })

        time.sleep(random.uniform(3,6)) 

    except Exception as e:
        print(f"  -> Error on page {page}: {e}")

if len(products) > 0:
    df_amazon = pd.DataFrame(products)
    df_amazon.to_excel("amazon_beauty_sentiment.xlsx", index=False)
    print(f"\nSuccess! Saved {len(products)} items to amazon_beauty_sentiment.xlsx")
else:
    print("\nFailed: No data scraped.")

Scraping Amazon page 1...
  -> Found 48 products.
Scraping Amazon page 2...
  -> Found 48 products.
Scraping Amazon page 3...
  -> Found 60 products.
Scraping Amazon page 4...
  -> Found 48 products.
Scraping Amazon page 5...
  -> Found 48 products.
Scraping Amazon page 6...
  -> Found 60 products.
Scraping Amazon page 7...
  -> Found 30 products.
Scraping Amazon page 8...
Scraping Amazon page 9...
  -> Found 12 products.
Scraping Amazon page 10...
Scraping Amazon page 11...
  -> Found 12 products.
Scraping Amazon page 12...
Scraping Amazon page 13...
Scraping Amazon page 14...
Scraping Amazon page 15...
Scraping Amazon page 16...
Scraping Amazon page 17...
Scraping Amazon page 18...
Scraping Amazon page 19...
Scraping Amazon page 20...
Scraping Amazon page 21...
  -> Found 12 products.
Scraping Amazon page 22...
Scraping Amazon page 23...
  -> Found 12 products.
Scraping Amazon page 24...
  -> Found 12 products.
Scraping Amazon page 25...
Scraping Amazon page 26...
  -> Found 12 produ

In [19]:
import pandas as pd


df = pd.read_csv('amazon_sentiment_analysis.csv')


df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')


def get_sentiment_from_rating(rating):
    if pd.isna(rating):
        return 'Unknown'
    if rating >= 4.0:
        return 'Positive' 
    elif rating >= 3.0:
        return 'Neutral'  
    else:
        return 'Negative'  


df['Sentiment'] = df['Rating'].apply(get_sentiment_from_rating)


df.to_csv('amazon_products_with_sentiment.csv', index=False)
df


Unnamed: 0,Product Name,Brand,Price (INR),Rating,Source,Product Type,Unnamed: 6,Sentiment
0,Noise,Noise,1499.0,4.0,Amazon,Watches,,Positive
1,Noise,Noise,2799.0,4.0,Amazon,Watches,,Positive
2,Noise,Noise,3199.0,4.0,Amazon,Watches,,Positive
3,Noise,Noise,2499.0,4.1,Amazon,Watches,,Positive
4,Casio,Casio,1894.0,4.4,Amazon,Watches,,Positive
...,...,...,...,...,...,...,...,...
1614,Skin Biome Serum 50ml | Hydrating & Nourishing...,Skin,2037.0,4.5,Amazon,beauty,,Positive
1615,SIX:AM 3-in-1 Korean Sun Serum SPF 50 PA++++ w...,SIX:AM,999.0,,Amazon,beauty,,Unknown
1616,Ghar Soaps Magic Glow SPF 50+ PA++++ Mineral S...,Ghar,399.0,3.8,Amazon,beauty,,Neutral
1617,Asaya Sheer Milk Sunscreen SPF 50 PA++++ UVA +...,Asaya,424.0,4.7,Amazon,beauty,,Positive


In [20]:
df = pd.read_csv('amazon_products_with_sentiment.csv')
print(df.head())

  Product Name  Brand  Price (INR)  Rating  Source Product Type  Unnamed: 6  \
0        Noise  Noise       1499.0     4.0  Amazon      Watches         NaN   
1        Noise  Noise       2799.0     4.0  Amazon      Watches         NaN   
2        Noise  Noise       3199.0     4.0  Amazon      Watches         NaN   
3        Noise  Noise       2499.0     4.1  Amazon      Watches         NaN   
4        Casio  Casio       1894.0     4.4  Amazon      Watches         NaN   

  Sentiment  
0  Positive  
1  Positive  
2  Positive  
3  Positive  
4  Positive  


In [21]:
df.to_csv('amazon_products_with_sentiment.csv', index=False)

print("File saved successfully!")

File saved successfully!


In [4]:
import pandas as pd

df = pd.read_excel("Merged_File.xlsx")
df


Unnamed: 0,Product Name,Brand,Price (INR),Rating,Sentiment,Availability
0,"Olay Total Effects Day Cream |with Vitamin B5,...",Olay,719,4.3,Neutral,In Stock
1,Dabur Gulabari Premium Rose Water - 400ml (Pac...,Dabur,283,4.4,Neutral,In Stock
2,Olay Cream Regenerist Super Collagen Peptides ...,Olay,1349,4.3,Neutral,In Stock
3,Ponds Bright Beauty Anti-Dullness & Brightenin...,Ponds,214,4.3,Neutral,In Stock
4,"Blue Heaven Festive MakeUp Kit For Women, Medi...",Blue,640,4.1,Neutral,In Stock
...,...,...,...,...,...,...
2801,Pull Reducer Natural Rubber Pull Reducer Train...,Pull,299,3.8,Negative,In Stock
2802,Cult Half Balance Ball Trainer for Full Body W...,Cult,2999,4.6,Not Rated,In Stock
2803,"SLG Plastic Agility Speed 18"" Ring Ladder Set ...",SLG,735,5.0,Positive,In Stock
2804,"2025 New Upgraded Pilates Reformer Machine, Mu...",2025,3299,4.0,Neutral,In Stock


In [5]:
summary = pd.DataFrame({
    "Total Products":[len(df)],
    "Avg Price": [df['Price (INR)'].mean()],
    "Avg Rating":[df['Rating'].mean()],
    "Positive Reviews":[df['Sentiment'].value_counts().get('Positive',0)],
    "Neutral Reviews":[df['Sentiment'].value_counts().get('Neutral',0)],
    "Negative Reviews":[df['Sentiment'].value_counts().get('Negative',0)]
})

summary


Unnamed: 0,Total Products,Avg Price,Avg Rating,Positive Reviews,Neutral Reviews,Negative Reviews
0,2806,5943.825731,4.058517,320,1309,994


In [6]:
df.groupby('Sentiment')['Rating'].mean()
df

Unnamed: 0,Product Name,Brand,Price (INR),Rating,Sentiment,Availability
0,"Olay Total Effects Day Cream |with Vitamin B5,...",Olay,719,4.3,Neutral,In Stock
1,Dabur Gulabari Premium Rose Water - 400ml (Pac...,Dabur,283,4.4,Neutral,In Stock
2,Olay Cream Regenerist Super Collagen Peptides ...,Olay,1349,4.3,Neutral,In Stock
3,Ponds Bright Beauty Anti-Dullness & Brightenin...,Ponds,214,4.3,Neutral,In Stock
4,"Blue Heaven Festive MakeUp Kit For Women, Medi...",Blue,640,4.1,Neutral,In Stock
...,...,...,...,...,...,...
2801,Pull Reducer Natural Rubber Pull Reducer Train...,Pull,299,3.8,Negative,In Stock
2802,Cult Half Balance Ball Trainer for Full Body W...,Cult,2999,4.6,Not Rated,In Stock
2803,"SLG Plastic Agility Speed 18"" Ring Ladder Set ...",SLG,735,5.0,Positive,In Stock
2804,"2025 New Upgraded Pilates Reformer Machine, Mu...",2025,3299,4.0,Neutral,In Stock


In [7]:
df.groupby('Brand')['Price (INR)'].mean().sort_values(ascending=False).head(10)


Brand
iPhone     134233.333333
Google      55866.750000
VIVO        54135.400000
OnePlus     45813.773585
iQOO        40551.777778
Vivo        37381.100000
Samsung     34343.921788
Tecno       32499.000000
Oppo        29036.500000
Nothing     27734.900000
Name: Price (INR), dtype: float64