In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/womens-clothing-online/pr?p[]=facets.price_range.from=500&p[]=facets.price_range.to=Max"
CATEGORY = "Women's Clothing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_womens_clothing = pd.DataFrame(data).drop_duplicates()
df_womens_clothing.to_csv("shopsy_womens_clothing.csv", index=False)

print(f"\n Total products scraped: {len(df_womens_clothing)}")

driver.quit()



 Scraping page 1
Page 1 scraped 30 products

 Scraping page 2
Page 2 scraped 36 products

 Scraping page 3
Page 3 scraped 35 products

 Total products scraped: 89


In [2]:
import pandas as pd

df_womens_clothing = pd.read_csv("shopsy_womens_clothing.csv")
df_womens_clothing

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"BLACK PATTI Self Design Anarkali Kurta, Bottom...",Women's Clothing,₹587,3.8,692
1,"Nisha Fashion Kurta, Trouser/Pant & Dupatta Set",Women's Clothing,₹524,3.8,57
2,"TIGERSNAKE Solid, Plain Bollywood Lycra Blend ...",Women's Clothing,₹465,3.9,27
3,Parevadi Fashion Modern Graceful Women Kurta S...,Women's Clothing,₹629,4.0,157
4,"Reizen Embellished, Dyed, Geometric Print, Gra...",Women's Clothing,₹550,3.9,381
...,...,...,...,...,...
84,Sun Fashion And Lifestyle Cotton Rayon Blend P...,Women's Clothing,₹537,3.5,291
85,"AGBTEX Self Design, Embroidered, Woven Bollywo...",Women's Clothing,₹724,3.8,239
86,"AALO FASHIONS Solid Kurta, Trouser/Pant & Dupa...",Women's Clothing,₹583,3.6,602
87,"Aalo Fashion Embroidered Kurta, Trouser/Pant &...",Women's Clothing,₹485,4.0,24


In [3]:
df_womens_clothing.duplicated().sum()

np.int64(0)

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/mens-clothing-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Men's Clothing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_mens_clothing = pd.DataFrame(data).drop_duplicates()
df_mens_clothing.to_csv("shopsy_mens_clothing.csv", index=False)

print(f"\n Total products scraped: {len(df_mens_clothing)}")

driver.quit()



 Scraping page 1
Page 1 scraped 21 products

 Scraping page 2
Page 2 scraped 24 products

 Scraping page 3
Page 3 scraped 22 products

 Total products scraped: 67


In [6]:
df_mens_clothing = pd.read_csv("shopsy_mens_clothing.csv")
df_mens_clothing

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"TIGERSNAKE Printed Men White, Black T-Shirt",Men's Clothing,₹532,3.6,440
1,VOMAZO Men Cotton Blend Cargos,Men's Clothing,₹966,3.6,453
2,NALAKA Regular Men Light Blue Jeans,Men's Clothing,₹485,3.5,107
3,"BOUGHT FIRST Men Solid Formal Black, White Shirt",Men's Clothing,₹470,3.6,173
4,"VRMA Printed Men Black, White T-Shirt",Men's Clothing,₹485,3.8,190
...,...,...,...,...,...
62,TrendiVastra Men Solid Kurta,Men's Clothing,₹485,3.8,12
63,CARABA Men Casual Jacket,Men's Clothing,₹474,3.6,37
64,TrendiVastra Men Solid Kurta,Men's Clothing,₹485,3.8,37
65,"BOUGHT FIRST Men Solid Formal Light Blue, Blac...",Men's Clothing,₹493,3.5,279


In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/kids-clothing-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Kid's Clothing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_kids_clothing = pd.DataFrame(data).drop_duplicates()
df_kids_clothing.to_csv("shopsy_kids_clothing.csv", index=False)

print(f"\n Total products scraped: {len(df_kids_clothing)}")

driver.quit()



 Scraping page 1
Page 1 scraped 27 products

 Scraping page 2
Page 2 scraped 32 products

 Scraping page 3
Page 3 scraped 19 products

 Total products scraped: 71


In [8]:
df_kids_clothing = pd.read_csv("shopsy_kids_clothing.csv")
df_kids_clothing

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Fashionate World Legging For Girls,Kid's Clothing,₹485,3.7,119
1,CHHOTE NAWAB Track Pant For Girls,Kid's Clothing,₹526,3.9,103
2,Fashionate World Legging For Girls,Kid's Clothing,₹485,3.9,26
3,PLATINUMCARTZ Girls Casual Jacket,Kid's Clothing,₹465,4.0,33
4,Divybhav Girls Lehenga Choli Ethnic Wear Flora...,Kid's Clothing,₹583,3.7,37
...,...,...,...,...,...
66,CUTE TREND Legging For Girls,Kid's Clothing,₹485,3.9,26
67,MK Fashion Girls Party(Festive) Top Pyjama,Kid's Clothing,₹554,3.8,430
68,CUTE TREND Girls Printed Pure Cotton T Shirt,Kid's Clothing,₹526,3.7,12
69,CUTE TREND Girls Printed Pure Cotton T Shirt,Kid's Clothing,₹485,3.8,424


In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/footwear-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Footwear"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_footwear = pd.DataFrame(data).drop_duplicates()
df_footwear.to_csv("shopsy_footwear.csv", index=False)

print(f"\n Total products scraped: {len(df_footwear)}")

driver.quit()



 Scraping page 1
Page 1 scraped 30 products

 Scraping page 2
Page 2 scraped 34 products

 Scraping page 3
Page 3 scraped 18 products

 Total products scraped: 75


In [12]:
df_footwear = pd.read_csv("shopsy_footwear.csv")
df_footwear

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,RAVDI Party Wear For Men,Footwear,₹509,3.9,43
1,RAVDI Party Wear For Men,Footwear,₹536,3.5,46
2,Beeone ComfortWalk Lace-Up Sneakers Sneakers F...,Footwear,₹475,3.5,6
3,jootiyapa JOOTIYAPA TRENDING PREMIUM SHOES FOR...,Footwear,₹886,5.0,22
4,PRODOC Men Casual Comfortable Chelsea Slip-On ...,Footwear,₹485,3.6,88
...,...,...,...,...,...
70,PENNEN Boys Lace Casual Shoes,Footwear,₹524,4.2,809
71,RAVDI Party Wear For Men,Footwear,₹485,3.6,434
72,PENNEN Boys Lace Casual Shoes,Footwear,₹535,3.7,166
73,Rekhi Brothers Men's Chunky Sneaker Casual Spo...,Footwear,₹536,3.5,38


In [13]:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/home-decor-online"
CATEGORY = "Home Decor"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_home_decor = pd.DataFrame(data).drop_duplicates()
df_home_decor.to_csv("shopsy_home_decor.csv", index=False)

print(f"\n Total products scraped: {len(df_home_decor)}")

driver.quit()




 Scraping page 1
Page 1 scraped 20 products

 Scraping page 2
Page 2 scraped 27 products

 Scraping page 3
Page 3 scraped 25 products

 Total products scraped: 70


In [14]:
df_home_decor = pd.read_csv("shopsy_home_decor.csv")
df_home_decor

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,SK PARO Radha Krishna wood wall hanging and wa...,Home Decor,₹84,4.2,130
1,walldecoree Decorative Wallpaper,Home Decor,₹57,3.5,211
2,COFP PACK OF 6 Wall Shelf Corner Wall Mount Wa...,Home Decor,₹203,4.1,33
3,ShopGlobal Analog Wall Clock,Home Decor,₹158,4.1,230
4,CLK craft purple_blue_butterfly_wall_hanging,Home Decor,₹46,4.1,849
...,...,...,...,...,...
65,Sonucollection Analog Wall Clock,Home Decor,₹165,3.9,849
66,COFP Multipurpose Wall Mounted Shelf Bathroom ...,Home Decor,₹86,3.7,31
67,ShopGlobal Wooden key holderFor multipurpose U...,Home Decor,₹59,3.8,639
68,Dzirezone Premium Premanand Ji Maharaj Wall Ha...,Home Decor,₹112,4.6,455


In [15]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/face-wash-online/pr?p%5B%5D=facets.price_range.from%3D200&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Beauty"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_beauty1 = pd.DataFrame(data).drop_duplicates()

print(f"\n Total products scraped: {len(df_beauty1)}")

driver.quit()



 Scraping page 1
Page 1 scraped 27 products

 Scraping page 2
Page 2 scraped 28 products

 Scraping page 3
Page 3 scraped 32 products

 Total products scraped: 81


In [18]:


from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/beauty-and-grooming/pr?sid=g9b"
CATEGORY = "Beauty"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_beauty2 = pd.DataFrame(data).drop_duplicates()

print(f"\n Total products scraped: {len(df_beauty)}")

driver.quit()




 Scraping page 1
Page 1 scraped 17 products

 Scraping page 2
Page 2 scraped 27 products

 Scraping page 3
Page 3 scraped 24 products

 Total products scraped: 152


In [17]:
df_beauty = pd.concat([df_beauty1,df_beauty2],ignore_index = True)
df_beauty

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"GHAR SOAPS Magic | Tan Removal, Skin Brighteni...",Beauty,₹271,4,106
1,GHAR SOAPS Aloe-Ceramide Hydrating for Dry to...,Beauty,₹284,3.6,5
2,Globus Naturals Coffee For All Skin Types Men ...,Beauty,₹225,3.8,391
3,"Peezons For Men, Anti Acne & Pimple, Skin Brig...",Beauty,₹215,3.6,358
4,Mylie Mulberry & Rose Buy One Get One - Fairne...,Beauty,₹185,4.3,848
...,...,...,...,...,...
147,"GHAR SOAPS Magic | Tan Removal, Skin Brighteni...",Beauty,₹271,4,540
148,"Globus Naturals Multani Mitti , Enriched With ...",Beauty,₹38,4,115
149,"javiro Handwash Liquid Refill Can, Refreshing ...",Beauty,₹165,3.8,35
150,Kiss Beauty Liquid matte lipstick - Red edition,Beauty,₹72,3.5,403


In [19]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/jewellery/pr?sid=mcr"
CATEGORY = "Women Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_women_accessories = pd.DataFrame(data).drop_duplicates()
df_women_accessories.to_csv("shopsy_women_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_women_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 19 products

 Scraping page 2
Page 2 scraped 24 products

 Scraping page 3
Page 3 scraped 23 products

 Total products scraped: 64


In [20]:
df_women_accessories = pd.read_csv("shopsy_women_accessories.csv")
df_women_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,RnK Creation ANKLE008 Copper Anklet,Women Accessories,₹177,3.9,343
1,Faizam collection Stainless Steel Bracelet,Women Accessories,₹62,4.0,707
2,Mad Club Alloy Jhumki Earring,Women Accessories,₹38,3.6,27
3,"Flaring Crystal Alloy Jhumki Earring, Drops & ...",Women Accessories,₹105,4.3,134
4,"Miraclewood Cubic Zirconia, Diamond, Crystal, ...",Women Accessories,₹126,4.1,258
...,...,...,...,...,...
59,MegaValue Trendy Festival Pendant Chain(BUY 2 ...,Women Accessories,₹129,4.2,233
60,ShopGlobal Beautiful Traditional indian anklet...,Women Accessories,₹198,3.6,426
61,TINALIVA Alloy Jewel Set,Women Accessories,₹132,3.9,52
62,Ruhi Silver Plated Stainless Steel Chain,Women Accessories,₹63,4.0,48


In [21]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/home-furnishing-online/pr?p%5B%5D=facets.price_range.from%3D500&p%5B%5D=facets.price_range.to%3DMax"
CATEGORY = "Home Furnishing"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_home_accessories = pd.DataFrame(data).drop_duplicates()
df_home_accessories.to_csv("shopsy_home_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_home_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 15 products

 Scraping page 2
Page 2 scraped 17 products

 Scraping page 3
Page 3 scraped 16 products

 Total products scraped: 48


In [22]:
df_home_accessories = pd.read_csv("shopsy_home_accessories.csv")
df_home_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,3.9,378
1,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,4.0,920
2,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,4.0,234
3,ShopGlobal Polyester Adults Mosquito Net,Home Furnishing,₹617,3.8,566
4,Radha Fitted Double Size Mattress Cover,Home Furnishing,₹598,3.8,556
5,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,3.9,244
6,clicknbuy 280 TC Cotton Double King Bedsheet,Home Furnishing,₹984,4.2,78
7,ShopGlobal Nylon Adults Mosquito Net,Home Furnishing,₹561,4.1,474
8,SMF 152 cm (5ft) Window Curtain,Home Furnishing,₹485,3.5,603
9,ShopGlobal Printed Single Fleece Blanket,Home Furnishing,₹485,3.8,87


In [23]:


from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/cookware-sets-online"
CATEGORY = "Kitchen Appliances"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_kitchen_appliances = pd.DataFrame(data).drop_duplicates()
df_kitchen_appliances.to_csv("shopsy_kitchen_appliances.csv", index=False)

print(f"\n Total products scraped: {len(df_kitchen_appliances)}")

driver.quit()



 Scraping page 1
Page 1 scraped 24 products

 Scraping page 2
Page 2 scraped 34 products

 Scraping page 3
Page 3 scraped 33 products

 Total products scraped: 88


In [24]:
df_kitchen_appliances = pd.read_csv("shopsy_kitchen_appliances.csv")
df_kitchen_appliances

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,BlessYou Non sticky Cookware (Pack of 6 pcs) -...,Kitchen Appliances,₹613,3.8,25
1,BuddyBOX Stainless Steel Handi | Milk Pot | St...,Kitchen Appliances,₹598,4.0,735
2,"LAZYwindow Iron Combo Pack – Tadka Pan, Roti T...",Kitchen Appliances,₹575,3.8,537
3,"Vigneshgenix Steel Tope Set With Lid,Patila Se...",Kitchen Appliances,₹409,3.8,58
4,Sumeet Cookware Set,Kitchen Appliances,"₹1,121",3.8,46
...,...,...,...,...,...
83,kushi Maa Red Appam 550gpl Cookware Set,Kitchen Appliances,₹298,3.9,9
84,LAZYwindow Iron Deep Bottom Kadhai & Tadka Pan...,Kitchen Appliances,₹295,3.6,146
85,GenKraft 4 Handi with lid + 4 Serving Spoon In...,Kitchen Appliances,₹550,3.6,70
86,LAZYwindow Premium Nonstick Tadka Pan/Fry Pan/...,Kitchen Appliances,₹664,4.4,47


In [25]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/cricket-bails-online"
CATEGORY = "Sports"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_sports = pd.DataFrame(data).drop_duplicates()
df_sports.to_csv("shopsy_sports.csv", index=False)

print(f"\n Total products scraped: {len(df_sports)}")

driver.quit()



 Scraping page 1
Page 1 scraped 22 products

 Scraping page 2
Page 2 scraped 0 products

 Scraping page 3
Page 3 scraped 0 products

 Total products scraped: 22


In [26]:
df_sports = pd.read_csv("shopsy_sports.csv")
df_sports

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,jaysarkar Tennis Trainer Rebound Ball (pack of...,Sports,₹252,4.1,16
1,SIPL Wooden stump bails Standard Bail,Sports,₹165,4.2,55
2,shri shyam traders cricket bails set of 2 Stan...,Sports,₹118,3.9,16
3,Dinetic Bails Standard Bail,Sports,₹132,3.9,20
4,Mercury Plus Medium Weight Cricket Rubber Tenn...,Sports,₹405,4.4,27
5,Supreme Trading SUPREME PLATIC CRICKET BALL 80...,Sports,₹250,3.8,4
6,YMD Stumps Bails Wooden Set Of 4 Standard Bail,Sports,₹162,4.0,8
7,Owlix bails Standard Bail,Sports,₹172,4.8,4
8,Dinetic WOODEN Standard Bail,Sports,₹113,4.1,7
9,ADM Cricket Bat Ball Plastic Cricket Bat No 8 ...,Sports,₹255,3.5,18


In [27]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/fitness-equipment-online"
CATEGORY = "Health & Fitness"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_health_fitness = pd.DataFrame(data).drop_duplicates()
df_health_fitness.to_csv("shopsy_health_fitness.csv", index=False)

print(f"\n Total products scraped: {len(df_health_fitness)}")

driver.quit()



 Scraping page 1
Page 1 scraped 34 products

 Scraping page 2
Page 2 scraped 38 products

 Scraping page 3
Page 3 scraped 38 products

 Total products scraped: 102


In [28]:
df_health_fitness = pd.read_csv("shopsy_health_fitness.csv")
df_health_fitness

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,SNAPSHOPWORLD Fitness Resistance Bands-4 Tube ...,Health & Fitness,₹221,3.9,11
1,AloneFit Weight Loss Sweat Belt 'All in one Si...,Health & Fitness,₹126,3.5,848
2,FirstFit Abdominal Ab Roller with 4 Wheel Exer...,Health & Fitness,₹368,4.4,24
3,IAMALOT Tummy Trimmer With Single Spring And S...,Health & Fitness,₹126,4.0,265
4,"KIDDU COLLECTION Tummy Trimmer, Waist Trimmer,...",Health & Fitness,₹226,4.0,51
...,...,...,...,...,...
97,"BoldFire Hand Grip Strengthener, 60 kg, with R...",Health & Fitness,₹245,3.9,18
98,RPC99 Double Spring Tummy Trimmer and Sweat Sl...,Health & Fitness,₹255,4.4,21
99,Pheonix Combo Of Chest Expander With 2 Foam Ha...,Health & Fitness,₹230,4.0,296
100,"RPC99 ""Solid Single Tummy Trimmer and Sweat Be...",Health & Fitness,₹171,4.2,14


In [29]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/learning-toys-online"
CATEGORY = "Toys"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_toys = pd.DataFrame(data).drop_duplicates()
df_toys.to_csv("shopsy_toys.csv", index=False)

print(f"\n Total products scraped: {len(df_toys)}")

driver.quit()



 Scraping page 1
Page 1 scraped 27 products

 Scraping page 2
Page 2 scraped 29 products

 Scraping page 3
Page 3 scraped 32 products

 Total products scraped: 85


In [30]:
df_toys = pd.read_csv("shopsy_toys.csv")
df_toys

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Toysnus Police Car Music,Toys,₹151,4.2,492
1,SHREE INCORPORATION Tent House for Baby Boy Gi...,Toys,₹248,4.0,49
2,THE RED BABY THE RED BABY Dancing Speaking Cac...,Toys,₹283,3.9,5
3,"ATHARV Mini Monster Truck Pull Back Cars Toys,...",Toys,₹36,4.1,664
4,Mayne Imaginative Play Building Set (100 pcs) ...,Toys,₹153,3.2,40
...,...,...,...,...,...
80,RBB HUB Soft toy for kidsCombo of Red Cap and ...,Toys,₹211,4.1,23
81,MINTORSI New tent House Tent For Girls And Boy...,Toys,₹285,4.0,331
82,ARIZON DIY Plastic Building Blocks Toy Set Cre...,Toys,₹313,3.7,257
83,Toysnus 360 Rechargeable Stunt Car With 3D Lig...,Toys,₹340,3.9,13


In [31]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/school-stationery-online"
CATEGORY = "Stationary"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_stationary = pd.DataFrame(data).drop_duplicates()
df_stationary.to_csv("shopsy_stationary.csv", index=False)

print(f"\n Total products scraped: {len(df_stationary)}")

driver.quit()



 Scraping page 1
Page 1 scraped 27 products

 Scraping page 2
Page 2 scraped 34 products

 Scraping page 3
Page 3 scraped 33 products

 Total products scraped: 86


In [32]:
df_stationary = pd.read_csv("shopsy_stationary.csv")
df_stationary

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Lynex Backbencher School & Office Bagpack 28 L...,Stationary,₹473,3.9,7
1,Happy Shopping Store Women White Panda Cosmeti...,Stationary,₹100,4.1,839
2,Stalory Multi-Functional Clipboard,Stationary,₹195,3.7,75
3,SIFU COLLECTION Beautiful Children Wooden Hand...,Stationary,₹138,4.0,246
4,ZERUS Stylish PU Leather School Bag | Men Wome...,Stationary,₹458,3.7,489
...,...,...,...,...,...
81,HMT COLLECTION Trendy Stylish Fashionable Woma...,Stationary,₹192,3.9,17
82,"RRLOOK RS 1,00,000 Money Saving Piggy Bank | C...",Stationary,₹113,3.7,103
83,MegaValue Vegetable Eraser /Fruit Eraser Birth...,Stationary,₹132,3.9,47
84,MSR STORE MO_24-Pink Panda_kids Bag Backpack,Stationary,₹110,3.6,48


In [33]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/hair-straightener-online"
CATEGORY = "Electronics"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_electronics = pd.DataFrame(data).drop_duplicates()
df_electronics.to_csv("shopsy_electronics.csv", index=False)

print(f"\n Total products scraped: {len(df_electronics)}")

driver.quit()



 Scraping page 1
Page 1 scraped 18 products

 Scraping page 2
Page 2 scraped 23 products

 Scraping page 3
Page 3 scraped 30 products

 Total products scraped: 70


In [34]:
df_electronics = pd.read_csv("shopsy_electronics.csv")
df_electronics

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,Lipsip Mini Hair Straightner for Girls & Women...,Electronics,₹127,3.6,75
1,Bingeable HDA 64 Mini Hair straightener 220V C...,Electronics,₹127,3.6,141
2,"Skrynnzer Hair Dryer, Hair Straight skrynnzer ...",Electronics,₹130,3.7,22
3,ABC Combo of 1000Watts MINI Hair Dryer and Min...,Electronics,₹234,3.6,38
4,ROYALECL hair straightener comb s56 Hair Strai...,Electronics,₹398,3.6,74
...,...,...,...,...,...
65,Foax BEAUTY Mini Straightner Hair Straightener...,Electronics,₹136,3.7,184
66,Brok Mini Professional Temperature Control Fla...,Electronics,₹136,3.4,389
67,FULKIZA Hair Straightener Comb for Women & Men...,Electronics,₹419,3.7,596
68,Mixonshun HR_1234 Hair Straightener,Electronics,₹136,4.3,141


In [35]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/mobile-accessories-online"
CATEGORY = "Mobile Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_mobile_accessories = pd.DataFrame(data).drop_duplicates()
df_mobile_accessories.to_csv("shopsy_mobile_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_mobile_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 31 products

 Scraping page 2
Page 2 scraped 38 products

 Scraping page 3
Page 3 scraped 39 products

 Total products scraped: 104


In [36]:
df_mobile_accessories = pd.read_csv("shopsy_mobile_accessories.csv")
df_mobile_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,MAK 20W Power Delivery Type C Charger with Lig...,Mobile Accessories,₹230,4.0,945
1,CLAT 80W SUPERVOOC/VOOC/FLASH/DART/ULTRADART/W...,Mobile Accessories,₹230,4.1,48
2,MAK 80W Supervooc Fast USB Charger With White ...,Mobile Accessories,₹195,3.9,283
3,RVAT 80W Bis certified with Type-C Cable Charg...,Mobile Accessories,₹266,3.8,427
4,Goldista Oppoo Reeno 10 Pro Plus (5G) Silicon ...,Mobile Accessories,₹122,3.7,878
...,...,...,...,...,...
99,MOBONE MOBONE®3-in-1 Cable 0.5 m All in One 60...,Mobile Accessories,₹175,3.8,23
100,RWM Redmi 13C 5G / Poco M6 5G Diamond Print Ca...,Mobile Accessories,₹160,4.1,15
101,Charghunt 45 watt Type-C Super Fast Charger Ad...,Mobile Accessories,₹283,4.2,172
102,"TP TROOPS 4 Ports USB Charger, 4.1A USB Wall C...",Mobile Accessories,₹203,4.1,153


In [37]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/computers/pr?sid=6bo&marketplace=FLIPKART"
CATEGORY = "Computer Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_computer_accessories = pd.DataFrame(data).drop_duplicates()
df_computer_accessories.to_csv("shopsy_computer_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_computer_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 33 products

 Scraping page 2
Page 2 scraped 38 products

 Scraping page 3
Page 3 scraped 38 products

 Total products scraped: 100


In [38]:
df_computer_accessories = pd.read_csv("shopsy_computer_accessories.csv")
df_computer_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,DEPOFF Large Extended Keyboard Shortcut Key Mo...,Computer Accessories,₹139,4.3,39
1,Fastway Lenovo Tab M10 FHD Plus 10.3 inch Tb-X...,Computer Accessories,₹410,3.6,8
2,RADHE KRISHNA ENTERPRISE Keyboard Pad - with O...,Computer Accessories,₹191,4.4,39
3,HP 128GB Pen drive V220W USB 3.0 FAST SPEED 12...,Computer Accessories,₹339,3.6,55
4,ALRITO USB Type A Male to Dual PS/2 Female for...,Computer Accessories,₹176,4.7,3
...,...,...,...,...,...
95,NAVYA Dustproof LED/LCD Cover for All Models L...,Computer Accessories,₹75,3.9,134
96,Agrima COMPACT USB HUB A-55 USB Hub,Computer Accessories,₹133,3.8,954
97,NAVYA Dustproof LED/LCD Cover for All Models L...,Computer Accessories,₹146,3.9,252
98,Agrima PREMIUM AGRiMA USB HUB A-99 USB Hub,Computer Accessories,₹95,3.8,7


In [39]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/audio-video/pr?sid=0pm&p[]=facets.wired_wireless%255B%255D%3DWireless"
CATEGORY = "Audio & Video"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_audio_video = pd.DataFrame(data).drop_duplicates()
df_audio_video.to_csv("shopsy_audio_video.csv", index=False)

print(f"\n Total products scraped: {len(df_audio_video)}")

driver.quit()



 Scraping page 1
Page 1 scraped 19 products

 Scraping page 2
Page 2 scraped 21 products

 Scraping page 3
Page 3 scraped 26 products

 Total products scraped: 66


In [40]:
df_audio_video = pd.read_csv("shopsy_audio_video.csv")
df_audio_video

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,OWDIO OWDIO TRUE WIRELEES BLACK BLUETOOTH HEAD...,Audio & Video,₹199,3.5,6
1,MAGENESE M19 Earbuds/TWS/buds 5.1 Earbuds with...,Audio & Video,₹215,3.7,302
2,TRONICS INDIA RC04-5pcs Red Cyan Paper Cardboa...,Audio & Video,₹192,4.8,868
3,GWALBROS GWALBROS CM.F Wireless Bluetooth Earb...,Audio & Video,₹266,4.0,193
4,GWALBROS New Airpod Pro A3 Earbuds With Touch ...,Audio & Video,₹195,3.7,588
...,...,...,...,...,...
61,PARYUSHI M19 Earbuds/TWS/Buds 5.1 Earbuds with...,Audio & Video,₹226,3.9,86
62,GWALBROS GWALBROS CM.F Wireless Bluetooth Earb...,Audio & Video,₹264,3.9,288
63,Vdtech Airpro Earbuds/ipod/buds 5.1 Earbuds wi...,Audio & Video,₹198,3.7,532
64,Valtria Ultrapods AI Calling Air buds Neckband...,Audio & Video,₹236,3.6,112


In [41]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# -------- CONFIG --------
URL = "https://www.shopsy.in/bags-wallets-belts/pr?sid=reh&marketplace=FLIPKART"
CATEGORY = "Men Accessories"

options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

wait = WebDriverWait(driver, 15)
driver.get(URL)
time.sleep(6)

data = []

# -------- FULL SCROLL FUNCTION --------
def full_scroll():
    for _ in range(6):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        driver.execute_script("window.scrollBy(0, -400)")
        time.sleep(1)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)

# -------- SCRAPE FUNCTION --------
def scrape_page():
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Product names
    names = soup.select(
        "span.css-1jxf684.r-op4f77.r-1et8rh5.r-1b43r93.r-14yzgew.r-1udh08x"
    )

    # Prices
    prices = soup.select(
        "div.css-146c3p1.r-cqee49.r-1vgyyaa.r-1rsjblm.r-13hce6t"
    )

    # Ratings
    ratings_all = soup.select(
        "div.css-146c3p1.r-jwli3a.r-1et8rh5.r-1enofrn"
    )
    ratings = [r.get_text(strip=True) for r in ratings_all if r.get_text(strip=True).replace('.', '').isdigit()]

    # Reviews: divs starting with '(' and ending with ')', extract number only
    all_text_divs = soup.select("div.css-146c3p1")
    reviews = []
    for d in all_text_divs:
        txt = d.get_text(strip=True)
        if txt.startswith("(") and txt.endswith(")"):
            num = txt.replace("(", "").replace(")", "")
            if num.isdigit():
                reviews.append(num)

    # Ensure all lists same length
    count = min(len(names), len(prices), len(ratings), len(reviews))
    page_total = 0

    for i in range(count):
        name = names[i].get_text(strip=True)
        price = prices[i].get_text(strip=True)
        rating = ratings[i]
        review = reviews[i]

        data.append({
            "Product Name": name,
            "Category": CATEGORY,
            "Price": price,
            "Rating": rating,
            "No_of_Reviews": review
        })
        page_total += 1

    return page_total

# -------- MAIN LOOP: PAGE 1 → 3 --------
for page_no in range(1, 4):
    print(f"\n Scraping page {page_no}")

    full_scroll()

    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.css-146c3p1.r-jwli3a")))
    except:
        pass

    scraped = scrape_page()
    print(f"Page {page_no} scraped {scraped} products")

    if page_no == 3:
        break

    # Pagination click
    try:
        next_page = wait.until(EC.presence_of_element_located((
            By.XPATH, f"//div[div[text()='{page_no+1}']]"
        )))
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", next_page)
        time.sleep(1)
        driver.execute_script("arguments[0].click()", next_page)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, 0)")
        time.sleep(2)
    except Exception as e:
        print("Pagination failed:", e)
        break

# -------- SAVE CSV --------
df_men_accessories = pd.DataFrame(data).drop_duplicates()
df_men_accessories.to_csv("shopsy_men_accessories.csv", index=False)

print(f"\n Total products scraped: {len(df_men_accessories)}")

driver.quit()



 Scraping page 1
Page 1 scraped 16 products

 Scraping page 2
Page 2 scraped 25 products

 Scraping page 3
Page 3 scraped 24 products

 Total products scraped: 61


In [42]:
df_men_accessories = pd.read_csv("shopsy_men_accessories.csv")
df_men_accessories

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,BPRIDE Men Black Texas Leatherite Belt,Men Accessories,₹77,4.0,286
1,Pavis Men Wallet,Men Accessories,₹98,3.8,215
2,Astro (Expandable) Light weight travel luggage...,Men Accessories,₹255,3.5,37
3,"Kiggo Men Brown, Brown Artificial Leather Belt",Men Accessories,₹136,3.7,97
4,xfast 30L Laptop Backpack Medium Bagpack schoo...,Men Accessories,₹198,3.6,112
...,...,...,...,...,...
56,ARK FASHION Medium 25 L Laptop Backpack Backpa...,Men Accessories,₹185,4.0,672
57,word land Men Black Genuine Leather Belt,Men Accessories,₹127,3.8,158
58,Pavis Men Wallet,Men Accessories,₹125,3.9,198
59,ZUOKA School Collage Office Laptop Waterproof ...,Men Accessories,₹282,4.4,17


In [43]:
import pandas as pd

df_shopsy = pd.concat(
    [
        df_womens_clothing,
        df_mens_clothing,
        df_kids_clothing,
        df_footwear,
        df_home_decor,
        df_beauty,
        df_women_accessories,
        df_men_accessories,
        df_home_accessories,
        df_kitchen_appliances,
        df_sports,
        df_stationary,
        df_electronics,
        df_toys,
        df_mobile_accessories,
        df_health_fitness,
        df_audio_video,
        df_computer_accessories
    ],
    ignore_index=True
)

df_shopsy


Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"BLACK PATTI Self Design Anarkali Kurta, Bottom...",Women's Clothing,₹587,3.8,692
1,"Nisha Fashion Kurta, Trouser/Pant & Dupatta Set",Women's Clothing,₹524,3.8,57
2,"TIGERSNAKE Solid, Plain Bollywood Lycra Blend ...",Women's Clothing,₹465,3.9,27
3,Parevadi Fashion Modern Graceful Women Kurta S...,Women's Clothing,₹629,4.0,157
4,"Reizen Embellished, Dyed, Geometric Print, Gra...",Women's Clothing,₹550,3.9,381
...,...,...,...,...,...
1415,NAVYA Dustproof LED/LCD Cover for All Models L...,Computer Accessories,₹75,3.9,134
1416,Agrima COMPACT USB HUB A-55 USB Hub,Computer Accessories,₹133,3.8,954
1417,NAVYA Dustproof LED/LCD Cover for All Models L...,Computer Accessories,₹146,3.9,252
1418,Agrima PREMIUM AGRiMA USB HUB A-99 USB Hub,Computer Accessories,₹95,3.8,7


In [44]:
df_shopsy.drop_duplicates(
    
    subset=["Product Name", "Price"],
    inplace=True
)

In [46]:
df_shopsy

Unnamed: 0,Product Name,Category,Price,Rating,No_of_Reviews
0,"BLACK PATTI Self Design Anarkali Kurta, Bottom...",Women's Clothing,₹587,3.8,692
1,"Nisha Fashion Kurta, Trouser/Pant & Dupatta Set",Women's Clothing,₹524,3.8,57
2,"TIGERSNAKE Solid, Plain Bollywood Lycra Blend ...",Women's Clothing,₹465,3.9,27
3,Parevadi Fashion Modern Graceful Women Kurta S...,Women's Clothing,₹629,4.0,157
4,"Reizen Embellished, Dyed, Geometric Print, Gra...",Women's Clothing,₹550,3.9,381
...,...,...,...,...,...
1415,NAVYA Dustproof LED/LCD Cover for All Models L...,Computer Accessories,₹75,3.9,134
1416,Agrima COMPACT USB HUB A-55 USB Hub,Computer Accessories,₹133,3.8,954
1417,NAVYA Dustproof LED/LCD Cover for All Models L...,Computer Accessories,₹146,3.9,252
1418,Agrima PREMIUM AGRiMA USB HUB A-99 USB Hub,Computer Accessories,₹95,3.8,7


In [47]:
df_shopsy.to_csv("shopsy_all_products.csv", index=False)

In [48]:
df_shopsy["Category"].value_counts()

Category
Beauty                  104
Computer Accessories     74
Health & Fitness         73
Mobile Accessories       71
Stationary               69
Women's Clothing         64
Kitchen Appliances       62
Home Decor               58
Toys                     58
Men's Clothing           55
Footwear                 49
Audio & Video            46
Electronics              45
Men Accessories          44
Women Accessories        43
Kid's Clothing           41
Home Furnishing          23
Sports                   22
Name: count, dtype: int64