## SHOPSY WOMENS CLOTHING

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/womens-clothing-online/pr?p[]=facets.price_range.from=500&p[]=facets.price_range.to=Max"
CATEGORY = "Women's Clothing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )
    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_womens_clothing = pd.DataFrame(data).drop_duplicates()
df_womens_clothing.to_csv("shopsy_womens_clothing.csv", index=False)

print(f"\n Total products scraped: {len(df_womens_clothing)}")

driver.quit()



 Scraping page 1
Page 1 scraped 30 products

 Scraping page 2
Page 2 scraped 33 products

 Scraping page 3
Page 3 scraped 38 products

 Total products scraped: 97


In [2]:
import pandas as pd

df_womens_clothing = pd.read_csv("shopsy_womens_clothing.csv")
df_womens_clothing

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"KOTHOTYA Women Kurta, Pyjama & Dupatta Set",Women's Clothing,₹614,3.9,830
1,"BLACK PATTI Self Design Anarkali Kurta, Bottom...",Women's Clothing,₹587,3.8,694
2,MISS CLOTHING Women Embroidered Kurta,Women's Clothing,₹485,3.7,41
3,"TIGERSNAKE Solid, Plain Bollywood Lycra Blend ...",Women's Clothing,₹485,3.9,694
4,Bairaj Women Kurta and Pant Set,Women's Clothing,₹567,3.8,383
...,...,...,...,...,...
92,NaziyaSarees Banarasi Silk Blend Saree with Un...,Women's Clothing,₹572,4.0,695
93,KAYRAAH Women Chikan Embroidery Kurta,Women's Clothing,₹535,3.8,41
94,pluze fashion studio Self Design Kanjivaram Pu...,Women's Clothing,₹485,3.7,65
95,MISS CLOTHING Women Embroidered Kurta,Women's Clothing,₹485,4.2,15


In [3]:
df_womens_clothing.duplicated().sum()

np.int64(0)

## SHOPSY MENS CLOTHING

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/mens-clothing-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Men's Clothing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_mens_clothing = pd.DataFrame(data).drop_duplicates()
df_mens_clothing.to_csv("shopsy_mens_clothing.csv", index=False)

print(f"\n Total products scraped: {len(df_mens_clothing)}")

driver.quit()



 Scraping page 1
Page 1 scraped 23 products

 Scraping page 2
Page 2 scraped 26 products

 Scraping page 3
Page 3 scraped 27 products

 Total products scraped: 70


In [5]:
df_mens_clothing = pd.read_csv("shopsy_mens_clothing.csv")
df_mens_clothing

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"MACK JONNEY Solid Men White, Grey Track Pants",Men's Clothing,₹556,3.9,33
1,ZAYSH Loose Fit Men Light Blue Jeans,Men's Clothing,₹512,3.7,219
2,ZAYSH Loose Fit Men Black Jeans,Men's Clothing,₹512,3.7,219
3,"TIGERSNAKE Printed Men White, Black T-Shirt",Men's Clothing,₹532,3.6,442
4,huda studio Extra soft microfiber hand & face ...,Men's Clothing,₹485,3.8,453
...,...,...,...,...,...
65,Shafin Fashion Men Denim Cargos,Men's Clothing,₹734,4.0,226
66,ZYRONA Men Casual Jacket,Men's Clothing,₹485,3.8,29
67,VROJASS Men Padded Jacket,Men's Clothing,₹638,3.8,68
68,SQUARERETAILS Printed Men Black T-Shirt,Men's Clothing,₹574,3.5,313


## SHOPSY KIDS CLOTHING

In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/kids-clothing-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Kid's Clothing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_kids_clothing = pd.DataFrame(data).drop_duplicates()
df_kids_clothing.to_csv("shopsy_kids_clothing.csv", index=False)

print(f"\n Total products scraped: {len(df_kids_clothing)}")

driver.quit()



 Scraping page 1
Page 1 scraped 25 products

 Scraping page 2
Page 2 scraped 26 products

 Scraping page 3
Page 3 scraped 11 products

 Total products scraped: 58


In [7]:
df_kids_clothing = pd.read_csv("shopsy_kids_clothing.csv")
df_kids_clothing

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Fashionate World Legging For Girls,Kid's Clothing,₹485,3.7,119
1,CHHOTE NAWAB Track Pant For Girls,Kid's Clothing,₹526,3.9,104
2,AYUB FASHION Indi Girls Maxi/Full Length Party...,Kid's Clothing,₹485,4.0,26
3,PLATINUMCARTZ Girls Casual Jacket,Kid's Clothing,₹465,3.9,8
4,HELLCAT Boys Printed Cotton Blend T Shirt,Kid's Clothing,₹985,4.3,37
5,Fashionate World Legging For Girls,Kid's Clothing,₹485,3.8,33
6,"Shirazi Kids Boys Casual Jacket Jeans, T-shirt",Kid's Clothing,₹485,3.7,23
7,Shozuj Boys & Girls Ethnic Jacket,Kid's Clothing,₹485,3.7,47
8,Pramukh Suppliers Bollywood Lycra Blend Saree ...,Kid's Clothing,₹520,3.6,23
9,MEHRAJ DRESSES N/A Girls Party(Festive) Top Pant,Kid's Clothing,₹508,4.1,257


## SHOPSY FOOTWEAR

In [8]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/footwear-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Footwear"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_footwear = pd.DataFrame(data).drop_duplicates()
df_footwear.to_csv("shopsy_footwear.csv", index=False)

print(f"\n Total products scraped: {len(df_footwear)}")

driver.quit()



 Scraping page 1
Page 1 scraped 29 products

 Scraping page 2
Page 2 scraped 31 products

 Scraping page 3
Page 3 scraped 35 products

 Total products scraped: 88


In [9]:
df_footwear = pd.read_csv("shopsy_footwear.csv")
df_footwear

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Shoe Island Player-X New Lightweight Running B...,Footwear,₹519,3.8,824
1,RAVDI Party Wear For Men,Footwear,₹509,3.9,43
2,RAVDI Boots For Men,Footwear,₹536,3.8,41
3,jootiyapa JOOTIYAPA TRENDING PREMIUM SHOES FOR...,Footwear,₹886,3.5,141
4,RAVDI Party Wear For Men,Footwear,₹485,3.8,332
...,...,...,...,...,...
83,CLUB STYLE RETRO Sneakers For Men,Footwear,₹557,3.5,7
84,"Bersache Sneaker, Loafers ,Casual with extra c...",Footwear,₹668,4.3,303
85,Vask Training & Gym Shoes For Men,Footwear,₹485,3.7,824
86,PATASHA Men’s Premium Black Leather Chelsea Bo...,Footwear,₹485,3.8,7


## SHOPSY HOME DECOR

In [40]:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/home-decor-online"
CATEGORY = "Home Decor"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_home_decor = pd.DataFrame(data).drop_duplicates()
df_home_decor.to_csv("shopsy_home_decor.csv", index=False)

print(f"\n Total products scraped: {len(df_home_decor)}")

driver.quit()




 Scraping page 1
Page 1 scraped 19 products

 Scraping page 2
Page 2 scraped 27 products

 Scraping page 3
Page 3 scraped 26 products

 Total products scraped: 68


In [41]:
df_home_decor = pd.read_csv("shopsy_home_decor.csv")
df_home_decor

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,SREP WALL HANGING CT,Home Decor,₹46,3.8,34
1,SNDArt Vastu 7 Running Horses Painting With Fr...,Home Decor,₹130,4.1,135
2,SK PARO Radha Krishna wood wall hanging and wa...,Home Decor,₹84,4.2,212
3,ShopGlobal Analog Wall Clock,Home Decor,₹158,4.1,65
4,iru creation Analog Wall Clock,Home Decor,₹148,3.9,30
...,...,...,...,...,...
63,VDCrafts Decorative Wooden Wall art For Home D...,Home Decor,₹112,4.0,578
64,Aashish.enterprises Analog Wall Clock,Home Decor,₹205,3.8,31
65,AMBEY DÉCOR Decorative Wallpaper,Home Decor,₹62,3.8,410
66,ZEN TREK Self Adhesive 45 cm Wall Sticker,Home Decor,₹89,3.6,267


## FACE WASH ONLINE

In [12]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/face-wash-online/pr?p%5B%5D=facets.price_range.from%3D200&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Beauty"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_beauty1 = pd.DataFrame(data).drop_duplicates()
df_beauty1.to_csv("shopsy_fashwash_online.csv", index=False)

print(f"\n Total products scraped: {len(df_beauty1)}")

driver.quit()



 Scraping page 1
Page 1 scraped 26 products

 Scraping page 2
Page 2 scraped 26 products

 Scraping page 3
Page 3 scraped 31 products

 Total products scraped: 78


In [13]:
df_beauty1 = pd.read_csv("shopsy_fashwash_online.csv")
df_beauty1

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"GHAR SOAPS Magic | Tan Removal, Skin Brighteni...",Beauty,₹284,4.0,113
1,"HIMALAYA Himalaya Purifying Neem , 200 Ml Men ...",Beauty,₹227,4.3,39
2,GHAR SOAPS Aloe-Ceramide Hydrating for Dry to...,Beauty,₹284,3.6,5
3,"Peezons For Men, Anti Acne & Pimple, Skin Brig...",Beauty,₹215,3.6,391
4,Mylie Mulberry & Rose Buy One Get One - Fairne...,Beauty,₹185,3.8,368
...,...,...,...,...,...
73,Globus Naturals Anti Pollution & Anti Acne Cha...,Beauty,₹212,3.6,38
74,BELLAVITA Vitamin C Pack of 2 for Oil Control...,Beauty,₹237,4.3,3
75,Mamaearth Tea Tree Natural for Acne & Pimples...,Beauty,₹227,4.2,7
76,Joy Skin Fruits Spots & Tan Clear Papaya For ...,Beauty,₹202,4.3,4


## BEAUTY AND GROOMING

In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/beauty-and-grooming/pr?sid=g9b"
CATEGORY = "Beauty"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_beauty2  = pd.DataFrame(data).drop_duplicates()

print(f"\n Total products scraped: {len(df_beauty2)}")

driver.quit()

df_beauty2 = pd.concat([df_beauty1,df_beauty2],ignore_index = True)
df_beauty2


 Scraping page 1
Page 1 scraped 19 products

 Scraping page 2
Page 2 scraped 20 products

 Scraping page 3
Page 3 scraped 28 products

 Total products scraped: 67


Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"GHAR SOAPS Magic | Tan Removal, Skin Brighteni...",Beauty,₹284,4.0,113
1,"HIMALAYA Himalaya Purifying Neem , 200 Ml Men ...",Beauty,₹227,4.3,39
2,GHAR SOAPS Aloe-Ceramide Hydrating for Dry to...,Beauty,₹284,3.6,5
3,"Peezons For Men, Anti Acne & Pimple, Skin Brig...",Beauty,₹215,3.6,391
4,Mylie Mulberry & Rose Buy One Get One - Fairne...,Beauty,₹185,3.8,368
...,...,...,...,...,...
140,NEXOST ENTERPRISE AlwayS Clear Glyserin Soap Base,Beauty,₹88,3.9,51
141,"INTIMIFY Rosemary Essential Oil, Rosemary Hair...",Beauty,₹125,4.2,162
142,"Everwey Nourishing Almond Honey, Cocoa Butter,...",Beauty,₹127,4,837
143,Kathiyawad Hair Fall Control Range with Onion ...,Beauty,₹208,3.6,749


## Jewellery

In [15]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/jewellery/pr?sid=mcr"
CATEGORY = "Women Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_women_accessories = pd.DataFrame(data).drop_duplicates()
df_women_accessories.to_csv("shopsy_women_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_women_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 20 products

 Scraping page 2
Page 2 scraped 25 products

 Scraping page 3
Page 3 scraped 32 products

 Total products scraped: 74


In [16]:
df_women_accessories = pd.read_csv("shopsy_women_accessories.csv")
df_women_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,RnK Creation ANKLE008 Copper Anklet,Women Accessories,₹177,3.9,345
1,RV JEWELLS Stainless Steel Silver Bracelet,Women Accessories,₹138,3.9,157
2,Mad Club Alloy Jhumki Earring,Women Accessories,₹38,3.6,290
3,TINALIVA Alloy Jewel Set,Women Accessories,₹124,3.9,258
4,ADMIER ACPC0133 Gold-plated Brass,Women Accessories,₹140,4.1,27
...,...,...,...,...,...
69,DHARM JEWELS Gold Plated Amrecian Diamond A Wo...,Women Accessories,₹65,3.9,69
70,Faizam collection Stainless Steel Bracelet,Women Accessories,₹63,4.0,39
71,"Atulenterprises Silver, Steel Jewel Set",Women Accessories,₹130,3.9,92
72,"PRASUB Beads, Pearl Alloy Jhumki Earring, Stud...",Women Accessories,₹36,3.7,918


## HOME FURNITURE

In [17]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/home-furnishing-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Home Furnishing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_home_accessories = pd.DataFrame(data).drop_duplicates()
df_home_accessories.to_csv("shopsy_home_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_home_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 19 products

 Scraping page 2
Page 2 scraped 25 products

 Scraping page 3
Page 3 scraped 20 products

 Total products scraped: 59


In [18]:
df_home_accessories = pd.read_csv("shopsy_home_accessories.csv")
df_home_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Classified Printed Double Mink Blanket,Home Furnishing,₹521,3.9,380
1,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,3.9,920
2,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,4.0,568
3,ShopGlobal Polyester Adults Mosquito Net,Home Furnishing,₹617,4.0,234
4,Divine Casa Cotton King Printed Bedsheet,Home Furnishing,₹839,4.0,556
5,Mattress Protector Fitted King Size Mattress C...,Home Furnishing,₹441,4.1,534
6,clicknbuy 280 TC Cotton Double King Bedsheet,Home Furnishing,₹984,4.0,534
7,ShopGlobal Nylon Adults Mosquito Net,Home Furnishing,₹561,3.8,244
8,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,3.8,534
9,Radha Fitted King Size Mattress Cover,Home Furnishing,₹598,3.9,78


## COOKWARE SETS

In [19]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/cookware-sets-online"
CATEGORY = "Kitchen Appliances"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_kitchen_appliances = pd.DataFrame(data).drop_duplicates()
df_kitchen_appliances.to_csv("shopsy_kitchen_appliances.csv", index=False)

print(f"\n Total products scraped: {len(df_kitchen_appliances)}")

driver.quit()



 Scraping page 1
Page 1 scraped 25 products

 Scraping page 2
Page 2 scraped 26 products

 Scraping page 3
Page 3 scraped 33 products

 Total products scraped: 83


In [20]:
df_kitchen_appliances = pd.read_csv("shopsy_kitchen_appliances.csv")
df_kitchen_appliances

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,BlessYou Non sticky Cookware (Pack of 6 pcs) -...,Kitchen Appliances,₹613,3.8,25
1,BuddyBOX Stainless Steel Handi | Milk Pot | St...,Kitchen Appliances,₹598,3.8,738
2,SIWAAN induction bottom cookware set Induction...,Kitchen Appliances,₹624,4.0,515
3,"Vigneshgenix Steel Tope Set With Lid,Patila Se...",Kitchen Appliances,₹449,3.6,546
4,Nirlon Plus Triply Stainless Steel Cookware Co...,Kitchen Appliances,"₹5,834",3.8,611
...,...,...,...,...,...
78,HM EVOTEK Cookware Set,Kitchen Appliances,₹286,4.1,4
79,HM EVOTEK Cookware Set,Kitchen Appliances,₹376,4.0,63
80,LAZYwindow Premium Quality Induction Bottom Co...,Kitchen Appliances,₹767,4.1,48
81,LAZYwindow Traditional Iron Deep Bottom Kadhai...,Kitchen Appliances,₹573,4.8,171


## CRICKET BAILS ONLINE

In [21]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/cricket-bails-online"
CATEGORY = "Sports"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_sports = pd.DataFrame(data).drop_duplicates()
df_sports.to_csv("shopsy_sports.csv", index=False)

print(f"\n Total products scraped: {len(df_sports)}")

driver.quit()



 Scraping page 1
Page 1 scraped 23 products

 Scraping page 2
Page 2 scraped 0 products

 Scraping page 3
Page 3 scraped 0 products

 Total products scraped: 23


In [22]:
df_sports = pd.read_csv("shopsy_sports.csv")
df_sports

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,jaysarkar Tennis Trainer Rebound Ball (pack of...,Sports,₹252,4.1,16
1,SIPL Wooden stump bails Standard Bail,Sports,₹165,4.2,55
2,shri shyam traders cricket bails set of 2 Stan...,Sports,₹118,3.9,16
3,Dinetic Bails Standard Bail,Sports,₹132,3.9,20
4,Supreme Trading SUPREME PLATIC CRICKET BALL 80...,Sports,₹250,3.8,4
5,Mercury Plus Medium Weight Cricket Rubber Tenn...,Sports,₹405,4.4,27
6,Owlix bails Standard Bail,Sports,₹172,4.8,4
7,YMD Stumps Bails Wooden Set Of 4 Standard Bail,Sports,₹162,4.0,8
8,Dinetic WOODEN Standard Bail,Sports,₹113,4.1,7
9,ADM Cricket Bat Ball Plastic Cricket Bat No 8 ...,Sports,₹255,3.5,18


## HEALTH FITNESS

In [23]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/fitness-equipment-online"
CATEGORY = "Health & Fitness"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_health_fitness = pd.DataFrame(data).drop_duplicates()
df_health_fitness.to_csv("shopsy_health_fitness.csv", index=False)

print(f"\n Total products scraped: {len(df_health_fitness)}")

driver.quit()



 Scraping page 1
Page 1 scraped 34 products

 Scraping page 2
Page 2 scraped 33 products

 Scraping page 3
Page 3 scraped 38 products

 Total products scraped: 97


In [24]:
df_health_fitness = pd.read_csv("shopsy_health_fitness.csv")
df_health_fitness

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,SNAPSHOPWORLD Fitness Resistance Bands-4 Tube ...,Health & Fitness,₹221,3.9,11
1,AloneFit Weight Loss Sweat Belt 'All in one Si...,Health & Fitness,₹126,3.5,854
2,SNAPSHOPWORLD Fitness Resistance Bands-4 Tube ...,Health & Fitness,₹211,4.0,269
3,IAMALOT Tummy Trimmer With Single Spring And S...,Health & Fitness,₹126,4.2,214
4,KIDDU COLLECTION KidduTummy Twister_N501 Ab Ex...,Health & Fitness,₹285,4.0,116
...,...,...,...,...,...
92,RASCO Abs Tummy Trimmer With Dual Spring Exerc...,Health & Fitness,₹168,4.0,22
93,GRYPP 4-Tube Pedal Resistance Band Full Body T...,Health & Fitness,₹232,4.1,32
94,Wearslim Abdominal Wheel With Push Up Bar | Ab...,Health & Fitness,₹334,4.0,296
95,RPC99 Double Spring Tummy Trimmer and Sweat Sl...,Health & Fitness,₹255,3.9,513


## TOYS

In [25]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/learning-toys-online"
CATEGORY = "Toys"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_toys = pd.DataFrame(data).drop_duplicates()
df_toys.to_csv("shopsy_toys.csv", index=False)

print(f"\n Total products scraped: {len(df_toys)}")

driver.quit()



 Scraping page 1
Page 1 scraped 24 products

 Scraping page 2
Page 2 scraped 25 products

 Scraping page 3
Page 3 scraped 27 products

 Total products scraped: 74


In [26]:
df_toys = pd.read_csv("shopsy_toys.csv")
df_toys

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,SHREE INCORPORATION Tent House for Baby Boy Gi...,Toys,₹248,4.0,518
1,Toysnus Police Car Music,Toys,₹141,4.2,35
2,Toybharat Electronic Musical Keyboard for Kids...,Toys,₹438,3.6,665
3,MSN Cry Air Dry Clay For Kids DIY Ultra Clay K...,Toys,₹30,4.0,114
4,Mayne Fun and Educational Building Set (100 pc...,Toys,₹155,4.0,82
...,...,...,...,...,...
69,"Toyingly Sleeping Penguin Soft Toy for Boys, G...",Toys,₹123,4.3,274
70,SUJANI Top CreateBlock: DIY Plastic Building B...,Toys,₹109,3.7,29
71,Spire 200 Watt Converter12v dc to ac inverter ...,Toys,₹436,3.9,86
72,ARIZON DIY Plastic Building Blocks Toy Set Cre...,Toys,₹97,3.7,402


## SCHOOL STATIONARY

In [27]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/school-stationery-online"
CATEGORY = "Stationary"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_stationary = pd.DataFrame(data).drop_duplicates()
df_stationary.to_csv("shopsy_stationary.csv", index=False)

print(f"\n Total products scraped: {len(df_stationary)}")

driver.quit()



 Scraping page 1
Page 1 scraped 28 products

 Scraping page 2
Page 2 scraped 32 products

 Scraping page 3
Page 3 scraped 35 products

 Total products scraped: 92


In [28]:
df_stationary = pd.read_csv("shopsy_stationary.csv")
df_stationary

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"SK PARO Wooden Money/Piggy Bank for Kids,\nSav...",Stationary,₹110,3.7,7
1,Lynex Backbencher School & Office Bagpack 28 L...,Stationary,₹473,3.9,845
2,Happy Shopping Store Women White Panda Cosmeti...,Stationary,₹100,4.1,76
3,SIFU COLLECTION Beautiful Children Wooden Hand...,Stationary,₹138,4.0,491
4,Hench Backpack Girl's Medium Waterproof School...,Stationary,₹285,3.7,251
...,...,...,...,...,...
87,zoniex Space Password Lock Double Sided Compas...,Stationary,₹326,4.0,228
88,Trust Zone Wooden Money box/Piggy Bank Box for...,Stationary,₹158,3.9,28
89,HAPPY SHOPPING STORE Premium quality large siz...,Stationary,₹116,4.0,52
90,BRUZONE Plastic Pencil Boxes,Stationary,₹204,4.0,204


##  ELOCTRONICS

In [29]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/hair-straightener-online"
CATEGORY = "Electronics"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_electronics = pd.DataFrame(data).drop_duplicates()
df_electronics.to_csv("shopsy_electronics.csv", index=False)

print(f"\n Total products scraped: {len(df_electronics)}")

driver.quit()



 Scraping page 1
Page 1 scraped 18 products

 Scraping page 2
Page 2 scraped 22 products

 Scraping page 3
Page 3 scraped 31 products

 Total products scraped: 71


In [30]:
df_electronics = pd.read_csv("shopsy_electronics.csv")
df_electronics

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Lipsip Mini Hair Straightner for Girls & Women...,Electronics,₹127,3.6,75
1,Bingeable HDA 64 Mini Hair straightener 220V C...,Electronics,₹127,3.6,23
2,"Skrynnzer Hair Dryer, Hair Straight skrynnzer ...",Electronics,₹130,3.7,141
3,ABC Combo of 1000Watts MINI Hair Dryer and Min...,Electronics,₹234,3.6,39
4,ROYALECL hair straightener comb s43 Hair Strai...,Electronics,₹398,4.0,74
...,...,...,...,...,...
66,FULKIZA Hair Straightener Comb for Women & Men...,Electronics,₹419,3.6,226
67,"IMC Mini Hair Straightener, Portable Mini Cera...",Electronics,₹133,4.0,136
68,HARDAN Hair Straightener Mini Portable for dai...,Electronics,₹180,3.8,392
69,Azkiya Mini Portable Electronic Hair Straighte...,Electronics,₹167,3.6,3


## Mobile Accessories

In [31]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/mobile-accessories-online"
CATEGORY = "Mobile Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_mobile_accessories = pd.DataFrame(data).drop_duplicates()
df_mobile_accessories.to_csv("shopsy_mobile_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_mobile_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 26 products

 Scraping page 2
Page 2 scraped 29 products

 Scraping page 3
Page 3 scraped 35 products

 Total products scraped: 88


In [32]:
df_mobile_accessories = pd.read_csv("shopsy_mobile_accessories.csv")
df_mobile_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,MAK 20W Power Delivery Type C Charger with Lig...,Mobile Accessories,₹200,4.0,958
1,MARKMAHI 3 in 1 charging cable multi charging ...,Mobile Accessories,₹112,3.7,288
2,MAK 80W Supervooc Fast USB Charger With White ...,Mobile Accessories,₹180,3.9,615
3,CLAT SUPERVOOC 80W Power Adapter Super Fast Ch...,Mobile Accessories,₹180,4.0,53
4,ZAPPY Apple iPhone 6 Plus Polycarbonate Back C...,Mobile Accessories,₹149,4.2,436
...,...,...,...,...,...
83,"TP TROOPS 4 Ports USB Charger, 4.1A USB Wall C...",Mobile Accessories,₹196,4.1,15
84,MAK 65W VOOC/FLASH/DART/WRAP 6.5A Adapter Comp...,Mobile Accessories,₹183,4.1,97
85,RSDWAG 3 in 1 Charging Cable Multi charging ca...,Mobile Accessories,₹120,3.6,55
86,Smarter Buy MBM 5A/50W SUPERFAST CHARGER CABLE...,Mobile Accessories,₹120,3.6,39


## Computer Accessories

In [33]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/computers/pr?sid=6bo&marketplace=FLIPKART"
CATEGORY = "Computer Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_computer_accessories = pd.DataFrame(data).drop_duplicates()
df_computer_accessories.to_csv("shopsy_computer_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_computer_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 32 products

 Scraping page 2
Page 2 scraped 32 products

 Scraping page 3
Page 3 scraped 37 products

 Total products scraped: 96


In [34]:
df_computer_accessories = pd.read_csv("shopsy_computer_accessories.csv")
df_computer_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,HP v220m 128 GB Pen Drive,Computer Accessories,₹326,3.8,654
1,Fastway Lenovo Tab M10 FHD Plus 10.3 inch Tb-X...,Computer Accessories,₹410,3.6,8
2,DEPOFF Large Extended Keyboard Shortcut Key Mo...,Computer Accessories,₹118,4.3,40
3,HP V220G 2IN OTG FREE 128 GB Pen Drive,Computer Accessories,₹330,3.8,3
4,ALRITO USB Type A Male to Dual PS/2 Female for...,Computer Accessories,₹176,4.7,13
...,...,...,...,...,...
91,TULOSTA Large Extended Keyboard Shortcut key M...,Computer Accessories,₹151,4.2,307
92,Shivsoft 3D Optical wired USB Mouse in Black M...,Computer Accessories,₹98,4.0,96
93,Bestor USB Hub Multiport Adapter for MacBook P...,Computer Accessories,₹251,3.6,479
94,"R FASHION Laptop Tabletop Stand, Fold-Up, Adju...",Computer Accessories,₹198,4.0,30


## Audio Video

In [35]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/audio-video/pr?sid=0pm&p[]=facets.wired_wireless%255B%255D%3DWireless"
CATEGORY = "Audio & Video"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_audio_video = pd.DataFrame(data).drop_duplicates()
df_audio_video.to_csv("shopsy_audio_video.csv", index=False)

print(f"\n Total products scraped: {len(df_audio_video)}")

driver.quit()



 Scraping page 1
Page 1 scraped 20 products

 Scraping page 2
Page 2 scraped 19 products

 Scraping page 3
Page 3 scraped 27 products

 Total products scraped: 66


In [36]:
df_audio_video = pd.read_csv("shopsy_audio_video.csv")
df_audio_video

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,OWDIO OWDIO TRUE WIRELEES BLACK BLUETOOTH HEAD...,Audio & Video,₹199,3.5,194
1,NS WORLD Thunder Pods V1 | Deep Bass | Large B...,Audio & Video,₹207,3.7,440
2,MAGENESE M19 Earbuds/TWS/buds 5.1 Earbuds with...,Audio & Video,₹215,3.7,940
3,GWALBROS GWALBROS CM.F Wireless Bluetooth Earb...,Audio & Video,₹266,4.0,74
4,"SHIVAY ENTERPRISES NEW ANC TWS, Ear buds with ...",Audio & Video,₹300,4.1,248
...,...,...,...,...,...
61,Techobucks 100% Branded TG 113 Splash-Proof 3D...,Audio & Video,₹300,4.0,12
62,ZENPHONIA wired earphones best quality combo o...,Audio & Video,₹157,4.1,114
63,Awique Type C Wired Earphone with Mic & Volume...,Audio & Video,₹182,3.7,220
64,Techobucks Newest Boom Beat Headphones Built-i...,Audio & Video,₹251,3.5,169


## Mens Accessories

In [37]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/bags-wallets-belts/pr?sid=reh&marketplace=FLIPKART"
CATEGORY = "Men Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_men_accessories = pd.DataFrame(data).drop_duplicates()
df_men_accessories.to_csv("shopsy_men_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_men_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 19 products

 Scraping page 2
Page 2 scraped 20 products

 Scraping page 3
Page 3 scraped 25 products

 Total products scraped: 60


In [38]:
df_men_accessories = pd.read_csv("shopsy_men_accessories.csv")
df_men_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,BPRIDE Men Black Texas Leatherite Belt,Men Accessories,₹77,3.9,294
1,AMAK INC Single Saree Cover High-Quality Singl...,Men Accessories,₹24,3.9,15
2,Saubhagye Gym Bag Body Building Pu Leather Duf...,Men Accessories,₹189,4.4,16
3,"Kiggo Men Brown, Brown Artificial Leather Belt",Men Accessories,₹136,3.7,10
4,Jholawala (Expandable) 65 L Travel Duffle Bags...,Men Accessories,₹235,3.7,216
5,Astro (Expandable) Light weight travel luggage...,Men Accessories,₹255,3.5,38
6,xfast Medium 25 L Backpack DAYPACK Bags Backpa...,Men Accessories,₹201,3.7,116
7,"RDP Men Black, Brown Artificial Leather Belt",Men Accessories,₹133,4.0,8
8,UHI Non-Woven Solid Foldable Storage Box/Bin O...,Men Accessories,₹213,3.7,222
9,ZEDEN (Expandable) PURPLE FLOWER Duffel With W...,Men Accessories,₹229,3.8,99


In [44]:
import pandas as pd

df_shopsy = pd.concat(
    [
        df_womens_clothing,
        df_mens_clothing,
        df_kids_clothing,
        df_footwear,
        df_home_decor,
        df_beauty1,
        df_beauty2,
        df_women_accessories,
        df_men_accessories,
        df_home_accessories,
        df_kitchen_appliances,
        df_sports,
        df_stationary,
        df_electronics,
        df_toys,
        df_mobile_accessories,
        df_health_fitness,
        df_audio_video,
        df_computer_accessories
    ],
    ignore_index=True
)

df_shopsy


Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"KOTHOTYA Women Kurta, Pyjama & Dupatta Set",Women's Clothing,₹614,3.9,830
1,"BLACK PATTI Self Design Anarkali Kurta, Bottom...",Women's Clothing,₹587,3.8,694
2,MISS CLOTHING Women Embroidered Kurta,Women's Clothing,₹485,3.7,41
3,"TIGERSNAKE Solid, Plain Bollywood Lycra Blend ...",Women's Clothing,₹485,3.9,694
4,Bairaj Women Kurta and Pant Set,Women's Clothing,₹567,3.8,383
...,...,...,...,...,...
1480,TULOSTA Large Extended Keyboard Shortcut key M...,Computer Accessories,₹151,4.2,307
1481,Shivsoft 3D Optical wired USB Mouse in Black M...,Computer Accessories,₹98,4.0,96
1482,Bestor USB Hub Multiport Adapter for MacBook P...,Computer Accessories,₹251,3.6,479
1483,"R FASHION Laptop Tabletop Stand, Fold-Up, Adju...",Computer Accessories,₹198,4.0,30


In [45]:
df_shopsy.drop_duplicates(
    
    subset=["Product Name", "Price"],
    inplace=True
)

In [46]:
df_shopsy

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"KOTHOTYA Women Kurta, Pyjama & Dupatta Set",Women's Clothing,₹614,3.9,830
1,"BLACK PATTI Self Design Anarkali Kurta, Bottom...",Women's Clothing,₹587,3.8,694
2,MISS CLOTHING Women Embroidered Kurta,Women's Clothing,₹485,3.7,41
3,"TIGERSNAKE Solid, Plain Bollywood Lycra Blend ...",Women's Clothing,₹485,3.9,694
4,Bairaj Women Kurta and Pant Set,Women's Clothing,₹567,3.8,383
...,...,...,...,...,...
1480,TULOSTA Large Extended Keyboard Shortcut key M...,Computer Accessories,₹151,4.2,307
1481,Shivsoft 3D Optical wired USB Mouse in Black M...,Computer Accessories,₹98,4.0,96
1482,Bestor USB Hub Multiport Adapter for MacBook P...,Computer Accessories,₹251,3.6,479
1483,"R FASHION Laptop Tabletop Stand, Fold-Up, Adju...",Computer Accessories,₹198,4.0,30


In [47]:
df_shopsy.to_csv("shopsy_all_products.csv", index=False)

In [48]:
df_shopsy["Category"].value_counts()

Category
Beauty                  102
Women's Clothing         75
Computer Accessories     71
Health & Fitness         67
Stationary               61
Mobile Accessories       61
Kitchen Appliances       57
Footwear                 56
Women Accessories        53
Toys                     50
Home Decor               46
Electronics              46
Audio & Video            46
Men's Clothing           45
Men Accessories          42
Kid's Clothing           37
Home Furnishing          35
Sports                   23
Name: count, dtype: int64