## Importing Packages for Web Scraping

In [2]:
import time
import re 
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

## Scraping AirPods


In [3]:
BASE_URL = "https://www.flipkart.com/search?q=airpods&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "AirPods"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    # Try parentheses format like (35,184)
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")
 
    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):  
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_airpods = pd.DataFrame(data)
df_airpods.to_csv("flipkart_airpods.csv", index=False)
print("\nData saved to flipkart_airpods.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_airpods.csv

Scraping completed successfully! Total: 200 products


In [4]:
df_airpods = pd.read_csv("flipkart_airpods.csv")
df_airpods

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,Apple AirPods Pro (2nd generation) with MagSaf...,AirPods,"₹16,990",4.6,"(35,836)",35836
1,"Apple AirPods 4 Wireless Earbuds, Personalised...",AirPods,"₹10,999",4.4,"(2,949)",2949
2,Apple AirPods(2nd gen) with Charging Case Blue...,AirPods,"₹7,999",4.5,"(1,55,790)",155790
3,Apple AirPods Pro 3 Bluetooth,AirPods,"₹25,900",4.6,(610),610
4,Apple AirPods 4 Wireless Earbuds with Active N...,AirPods,"₹16,900",4.4,"(1,703)",1703
...,...,...,...,...,...,...
195,"XEWISS Superpods Immersio, 60Hrs, Dolby Audio,...",AirPods,₹867,,,0
196,ULTADOR Wireless Headphone - 100Hours Playtime...,AirPods,₹497,4.1,(13),13
197,BVEXO Bluetooth Wireless Neckband | Magnetic O...,AirPods,₹472,,,0
198,YEZZY BEZZY Plastic Press Stud Headphone Case ...,AirPods,₹295,3.6,(29),29


 ## Scraping Smart Watches

In [5]:
BASE_URL = "https://www.flipkart.com/search?q=smart+watches&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Smart Watches"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6): 
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_smartwatches = pd.DataFrame(data)
df_smartwatches.to_csv("flipkart_smartwatches.csv", index=False)
print("\nData saved to flipkart_smartwatches.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_smartwatches.csv

Scraping completed successfully! Total: 200 products


In [6]:
df_smartwatches = pd.read_csv("flipkart_smartwatches.csv")
df_smartwatches

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,Fire-Boltt Hurricane 33.02mm (1.3) Curved Glas...,Smart Watches,"₹1,199",,,0
1,"PunnkFunnk Edge-Lite | 1.99 "" Amoled Display |...",Smart Watches,₹849,,,0
2,TECHIO H9 PRO MAX SMART WATCH Smartwatch,Smart Watches,₹773,,,0
3,"Cellecor STEW 1.83"" BT Calling, 500 NITS, AI V...",Smart Watches,₹899,,,0
4,"Noise Icon Arc 2.01"" Immersive Curved Display,...",Smart Watches,"₹1,199",,,0
...,...,...,...,...,...,...
195,Fastrack FS1-1.85''|Advanced Blazing Fast UI|W...,Smart Watches,"₹1,299",,,0
196,Fire-Boltt Rise Luxe Bluetooth Calling 47mm (1...,Smart Watches,"₹1,399",,,0
197,"GOBOULT Drift+ 1.85'' HD Display ,BT Calling, ...",Smart Watches,"₹1,399",,,0
198,"boAt Storm Call w/ 4.29 cm(1.69""), BT Calling ...",Smart Watches,"₹1,349",,,0


## Scraping Laptops

In [7]:
BASE_URL = "https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Laptops"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")
 
    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):  
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_laptops = pd.DataFrame(data)
df_laptops.to_csv("flipkart_laptops.csv", index=False)
print("\nData saved to flipkart_laptops.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_laptops.csv

Scraping completed successfully! Total: 120 products


In [8]:
df_laptops = pd.read_csv("flipkart_laptops.csv")
df_laptops

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,MOTOROLA Motobook 60 Pro Full Metal OLED AI PC...,Laptops,"₹61,990",4.5,297 Ratings & 48 Reviews,297
1,MOTOROLA Motobook 60 Pro Full Metal OLED AI PC...,Laptops,"₹61,990",4.5,297 Ratings & 48 Reviews,297
2,Acer Aspire 15 AMD Ryzen 5 Hexa Core 7430U - (...,Laptops,"₹37,990",4.1,"3,623 Ratings & 345 Reviews",3623
3,HP Intel Core i3 13th Gen 1315U - (8 GB/512 GB...,Laptops,"₹39,990",4.2,99 Ratings & 4 Reviews,99
4,HP 15s AMD Ryzen 5 Quad Core 7520U - (16 GB/51...,Laptops,"₹46,990",4.3,79 Ratings & 1 Reviews,79
...,...,...,...,...,...,...
115,ASUS Vivobook Go 15 (2025) with Office 2024 + ...,Laptops,"₹37,990",4.2,30 Ratings & 2 Reviews,30
116,ASUS ExpertBook P1 (i7 14th Gen) Intel Core 7 ...,Laptops,"₹79,990",,,0
117,ASUS ExpertBook P1 (i5 14th Gen) Intel Core 5 ...,Laptops,"₹67,990",,,0
118,Acer NITRO LITE 16 Intel Core i5 13th Gen 1342...,Laptops,"₹74,999",4.4,502 Ratings & 46 Reviews,502


  ## Scraping Smart TV's

In [9]:
BASE_URL = "https://www.flipkart.com/search?q=smart+tv&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Smart TV"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_smarttv = pd.DataFrame(data)
df_smarttv.to_csv("flipkart_smarttv.csv", index=False)
print("\nData saved to flipkart_smarttv.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_smarttv.csv

Scraping completed successfully! Total: 120 products


In [10]:
df_smarttv = pd.read_csv("flipkart_smarttv.csv")
df_smarttv

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,PHILIPS 138 cm (55 inch) QLED Ultra HD (4K) Sm...,Smart TV,"₹33,999",4.1,"2,489 Ratings & 352 Reviews",2489
1,PHILIPS 164 cm (65 inch) QLED Ultra HD (4K) Sm...,Smart TV,"₹46,999",4.1,"2,489 Ratings & 352 Reviews",2489
2,Infinix 80 cm (32 inch) HD Ready LED Smart Lin...,Smart TV,"₹7,799",4.2,"66,751 Ratings & 6,204 Reviews",66751
3,realme TechLife 80 cm (32 inch) QLED HD Ready ...,Smart TV,"₹9,199",4.3,"22,932 Ratings & 1,775 Reviews",22932
4,realme TechLife 80 cm (32 inch) QLED HD Ready ...,Smart TV,"₹9,299",4.3,"22,932 Ratings & 1,775 Reviews",22932
...,...,...,...,...,...,...
115,realme TechLife 191 cm (75 inch) QLED Ultra HD...,Smart TV,"₹58,099",4.3,"22,932 Ratings & 1,775 Reviews",22932
116,TOSHIBA C350NP 139 cm (55 inch) Ultra HD (4K) ...,Smart TV,"₹31,999",4.3,"7,369 Ratings & 1,184 Reviews",7369
117,Thomson 139 cm (55 inch) QLED Ultra HD (4K) Sm...,Smart TV,"₹25,499",4.3,"37,424 Ratings & 5,922 Reviews",37424
118,TCL 108 cm (43 inch) Ultra HD (4K) LED Smart G...,Smart TV,"₹21,990",4.1,"5,020 Ratings & 336 Reviews",5020


## Scraping Mixer Grinders

In [11]:
BASE_URL = "https://www.flipkart.com/search?q=mixer+grinder&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Mixer Grinders"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_mixergrinders = pd.DataFrame(data)
df_mixergrinders.to_csv("flipkart_mixergrinders.csv", index=False)
print("\nData saved to flipkart_mixergrinders.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_mixergrinders.csv

Scraping completed successfully! Total: 120 products


In [12]:
df_mixergrinders = pd.read_csv("flipkart_mixergrinders.csv")
df_mixergrinders

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,Flipkart SmartBuy Pento Blue 500 W Juicer Mixe...,Mixer Grinders,"₹1,085",4.2,"7,793 Ratings & 1,138 Reviews",7793
1,Sansui Plus 500 W Juicer Mixer Grinder,Mixer Grinders,"₹1,299",4.2,"62,310 Ratings & 3,914 Reviews",62310
2,Sansui Pro 500 W Juicer Mixer Grinder,Mixer Grinders,"₹1,899",4.1,"8,504 Ratings & 680 Reviews",8504
3,Longway Super Dlx 750 W Juicer Mixer Grinder,Mixer Grinders,"₹1,499",4.1,"2,25,575 Ratings & 47,492 Reviews",225575
4,Moonstruck Juicer Mixer Grinder 750 W Juicer M...,Mixer Grinders,"₹1,144",4.1,"12,544 Ratings & 1,960 Reviews",12544
...,...,...,...,...,...,...
115,Growsmart Powerful HydroGrind Heavy Duty Coppe...,Mixer Grinders,"₹1,367",3.9,"7,059 Ratings & 853 Reviews",7059
116,"Alibaba Super Combo 750 W Dry Iron (White, Gre...",Mixer Grinders,"₹1,697",3.9,"3,906 Ratings & 305 Reviews",3906
117,Skystar PowerMixx Pro 750 W Juicer Mixer Grinder,Mixer Grinders,"₹1,046",3.9,"6,099 Ratings & 1,046 Reviews",6099
118,BOSCH Mixer Grinder 1000 W Mixer Grinder,Mixer Grinders,"₹5,599",4.5,"25,439 Ratings & 2,286 Reviews",25439


## Scraping Air Coolers

In [13]:
BASE_URL = "https://www.flipkart.com/search?q=air+cooler&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Air Coolers"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_aircoolers = pd.DataFrame(data)
df_aircoolers.to_csv("flipkart_aircoolers.csv", index=False)
print("\nData saved to flipkart_aircoolers.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_aircoolers.csv

Scraping completed successfully! Total: 200 products


## Scraping Refrigerators


In [14]:
BASE_URL = "https://www.flipkart.com/search?q=refrigerator&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Refrigerators"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_refrigerators = pd.DataFrame(data)
df_refrigerators.to_csv("flipkart_refrigerators.csv", index=False)
print("\nData saved to flipkart_refrigerators.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_refrigerators.csv

Scraping completed successfully! Total: 120 products


In [15]:
df_refrigerators = pd.read_csv("flipkart_refrigerators.csv")
df_refrigerators


Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,Samsung 183 L Direct Cool Single Door 2 Star R...,Refrigerators,"₹13,490",4.5,"1,34,631 Ratings & 8,737 Reviews",134631
1,Godrej 183 L Direct Cool Single Door 3 Star Re...,Refrigerators,"₹12,990",4.3,"9,960 Ratings & 547 Reviews",9960
2,Haier 185 L Direct Cool Single Door 2 Star Ref...,Refrigerators,"₹11,990",4.3,"17,002 Ratings & 1,046 Reviews",17002
3,Samsung 183 L Direct Cool Single Door 4 Star R...,Refrigerators,"₹14,990",4.4,"16,961 Ratings & 1,003 Reviews",16961
4,Voltas Beko 183 L Direct Cool Single Door 2 St...,Refrigerators,"₹11,990",4.3,"12,119 Ratings & 741 Reviews",12119
...,...,...,...,...,...,...
115,Samsung 385 L Frost Free Double Door 2 Star Co...,Refrigerators,"₹42,990",4.2,"16,007 Ratings & 1,059 Reviews",16007
116,Whirlpool 235 L Frost Free Double Door 2 Star ...,Refrigerators,"₹20,990",4.2,"9,738 Ratings & 727 Reviews",9738
117,Samsung 183 L Direct Cool Single Door 5 Star R...,Refrigerators,"₹16,790",4.5,"1,34,631 Ratings & 8,737 Reviews",134631
118,BOSCH 207 L Direct Cool Single Door 5 Star Ref...,Refrigerators,"₹18,990",4.6,409 Ratings & 35 Reviews,409


## Scraping Washing Machines

In [16]:
BASE_URL = "https://www.flipkart.com/search?q=washing+machine&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
CATEGORY = "Washing Machines"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

data = []

def get_only_review_count(text):
    """Extract review count from text like '573 Ratings&40 Reviews' or '(35,184)'"""
    match = re.search(r'([\d,]+)\s*Ratings?', text, re.IGNORECASE)
    if match:
        return match.group(1).replace(',', '')
    text = text.replace('(', '').replace(')', '')
    match = re.search(r'([\d,]+)', text)
    if match:
        return match.group(1).replace(',', '')
    return "0"

def get_product_name(container):
    """Try multiple selectors to get product name"""
    name_selectors = [
        ('a.pIpigb', 'title'),
        ('a.atJtCj', 'title'),
        ('div.RG5Slk', 'text'),
        ('a.wjcEIp', 'title'),
        ('div.KzDlHZ', 'text'),
        ('a.CGtC98', 'title'),
    ]
    for sel, attr in name_selectors:
        el = container.select_one(sel)
        if el:
            name = el.get('title', '') if attr == 'title' else el.get_text(strip=True)
            if name:
                return name
    return None

def scrape_page(page_no):
    url = f"{BASE_URL}&page={page_no}"
    print(f"Scraping page {page_no}")
    driver.get(url)
    time.sleep(5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    product_containers = soup.select("div[data-id]")
    
    for container in product_containers:
        name = get_product_name(container)
        if not name:
            continue
        
        price_el = container.select_one("div.hZ3P6w")
        price = price_el.text.strip() if price_el else ""
        
        rating_el = container.select_one("div.MKiFS6")
        rating = rating_el.text.strip() if rating_el else ""
        
        rating_count_el = container.select_one("span.PvbNMB")
        rating_count_text = rating_count_el.text.strip() if rating_count_el else ""
        no_of_ratings = get_only_review_count(rating_count_text)
        
        if name:
            data.append({
                "Product Name": name,
                "Category": CATEGORY,
                "Price": price,
                "Rating": rating,
                "Reviews_Text": rating_count_text,
                "No_of_Reviews": no_of_ratings
            })

try:
    for page in range(1, 6):
        scrape_page(page)
        time.sleep(2)
finally:
    driver.quit()

df_washingmachines = pd.DataFrame(data)
df_washingmachines.to_csv("flipkart_washingmachines.csv", index=False)
print("\nData saved to flipkart_washingmachines.csv")
print(f"\nScraping completed successfully! Total: {len(data)} products")

Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5

Data saved to flipkart_washingmachines.csv

Scraping completed successfully! Total: 120 products


In [17]:
df_washingmachines = pd.read_csv("flipkart_washingmachines.csv")
df_washingmachines

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,Voltas Beko 7 kg Fully Automatic Top Load Wash...,Washing Machines,"₹13,990",4.1,"2,724 Ratings & 206 Reviews",2724
1,Whirlpool 7 kg Magic Clean 5 Star Fully Automa...,Washing Machines,"₹15,140",4.1,"45,161 Ratings & 2,804 Reviews",45161
2,"Haier 6.5 kg Balance Clean Pulsator, Custom Wa...",Washing Machines,"₹11,790",4.3,"76,605 Ratings & 8,341 Reviews",76605
3,"LG 7 kg 5 Star with Wind Jet Dry, Collar Scrub...",Washing Machines,"₹10,990",4.5,"55,685 Ratings & 3,351 Reviews",55685
4,realme TechLife 7 kg 5 Star rating Semi Automa...,Washing Machines,"₹7,590",4.2,"1,02,791 Ratings & 8,718 Reviews",102791
...,...,...,...,...,...,...
115,Haier 11 kg Toughened Glass with Digital Kingf...,Washing Machines,"₹14,850",4.3,"7,401 Ratings & 432 Reviews",7401
116,"LG 9 kg 5 Star with AI Direct Drive, 6 Motion,...",Washing Machines,"₹38,990",4.4,"14,787 Ratings & 946 Reviews",14787
117,ONIDA 9 kg Washer only Black,Washing Machines,"₹6,489",4.2,"31,564 Ratings & 3,284 Reviews",31564
118,Voltas Beko 8 kg Fully Automatic Top Load Wash...,Washing Machines,"₹17,490",4.3,"1,100 Ratings & 90 Reviews",1100


## Combining All Data

In [18]:
df_airpods = pd.read_csv("flipkart_airpods.csv")
df_smartwatches = pd.read_csv("flipkart_smartwatches.csv")
df_laptops = pd.read_csv("flipkart_laptops.csv")
df_smarttv = pd.read_csv("flipkart_smarttv.csv")
df_mixergrinders = pd.read_csv("flipkart_mixergrinders.csv")
df_aircoolers = pd.read_csv("flipkart_aircoolers.csv")
df_refrigerators = pd.read_csv("flipkart_refrigerators.csv")
df_washingmachines = pd.read_csv("flipkart_washingmachines.csv")

df_all_products = pd.concat([
    df_airpods,
    df_smartwatches,
    df_laptops,
    df_smarttv,
    df_mixergrinders,
    df_aircoolers,
    df_refrigerators,
    df_washingmachines
], ignore_index=True)

df_all_products.to_csv("flipkart_all_products.csv", index=False)

print("="*60)
print("FINAL SUMMARY")
print("="*60)
print(f"\nTotal products scraped: {len(df_all_products)}")
print(f"\nCombined data saved to: flipkart_all_products.csv")
print("\nProducts per category:")
print(df_all_products['Category'].value_counts())

FINAL SUMMARY

Total products scraped: 1200

Combined data saved to: flipkart_all_products.csv

Products per category:
Category
AirPods             200
Smart Watches       200
Air Coolers         200
Laptops             120
Smart TV            120
Mixer Grinders      120
Refrigerators       120
Washing Machines    120
Name: count, dtype: int64


In [20]:
df_all_products

Unnamed: 0,Product Name,Category,Price,Rating,Reviews_Text,No_of_Reviews
0,Apple AirPods Pro (2nd generation) with MagSaf...,AirPods,"₹16,990",4.6,"(35,836)",35836
1,"Apple AirPods 4 Wireless Earbuds, Personalised...",AirPods,"₹10,999",4.4,"(2,949)",2949
2,Apple AirPods(2nd gen) with Charging Case Blue...,AirPods,"₹7,999",4.5,"(1,55,790)",155790
3,Apple AirPods Pro 3 Bluetooth,AirPods,"₹25,900",4.6,(610),610
4,Apple AirPods 4 Wireless Earbuds with Active N...,AirPods,"₹16,900",4.4,"(1,703)",1703
...,...,...,...,...,...,...
1195,Haier 11 kg Toughened Glass with Digital Kingf...,Washing Machines,"₹14,850",4.3,"7,401 Ratings & 432 Reviews",7401
1196,"LG 9 kg 5 Star with AI Direct Drive, 6 Motion,...",Washing Machines,"₹38,990",4.4,"14,787 Ratings & 946 Reviews",14787
1197,ONIDA 9 kg Washer only Black,Washing Machines,"₹6,489",4.2,"31,564 Ratings & 3,284 Reviews",31564
1198,Voltas Beko 8 kg Fully Automatic Top Load Wash...,Washing Machines,"₹17,490",4.3,"1,100 Ratings & 90 Reviews",1100
