#Product Web Scraper

In [None]:
import requests
from lxml import html
from pprint import pprint

In [None]:
def get_html_tree(url, headers=None):
    try:
        response = requests.get(url, headers=headers or {})
        if response.status_code == 200:
            return html.fromstring(response.text)
        else:
            print(f"Failed to fetch {url} - Status code: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return None

In [None]:
def parse_newme(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    tree = get_html_tree(url, headers)
    if tree is None:
        return []

    products = []

    product_divs = tree.xpath('//div[starts-with(@id, "") and div/a]')

    for prod in product_divs:
        try:
            title = ''.join(prod.xpath('.//div/a/div/div/div[2]//text()')).strip()
            if not title:
                continue
            price_texts = [text.strip() for text in prod.xpath('.//div/a/div/div/div[3]/div/div//text()') if text.strip()]
            price = ''.join(price_texts[:2])
            link = prod.xpath('.//a/@href')
            link = "https://newme.asia" + link[0] if link else None

            img = prod.xpath('.//div/a/div/div/div[1]/div[1]/img/@href')
            img = img[0] if img else None

            discount = ''.join(prod.xpath('.//div/a/div/div/div[3]/div/div/div[3]//text()')).strip()

            rating_texts = prod.xpath('.//div[contains(@class, "bg-nm_white")]/text() | .//div[contains(@class, "bg-nm_white")]/span/text()')
            cleaned = [x.strip() for x in rating_texts if x.strip() and x.strip() != '|']
            rating = ''.join(cleaned[:2]) if len(cleaned) >= 2 else None

            text = f"{title} {price} {discount}".strip()

            products.append({
                "product_title": title,
                "product_price": price,
                "product_link": link,
                "product_image_url": img,
                "product_rating": rating or None,
                "product_discount": discount or None,
                "product_text": text
            })
        except Exception as e:
            print("Error parsing product:", e)
            continue

    return products
parse_newme("https://newme.asia/collection/cannes-2025?subCategory=&product_cat=&orderby=menu_order")

[{'product_title': 'Gold Shimmer Gown',
  'product_price': '₹1599',
  'product_link': 'https://newme.asia/product/gold-shimmer-gown',
  'product_image_url': 'https://assets.newme.asia/wp-content/uploads/2024/11/23103344714ed636/NM-PRC-34-DRS-24-NOV-11620-GOLD(1).webp',
  'product_rating': '★4.4',
  'product_discount': '16% off',
  'product_text': 'Gold Shimmer Gown ₹1599 16% off'},
 {'product_title': 'Imitation Multicolor Drop Earrings',
  'product_price': '₹199',
  'product_link': 'https://newme.asia/product/multicolor-rhinestone-embellished-drop-earrings',
  'product_image_url': 'https://assets.newme.asia/wp-content/uploads/2025/05/17103234c2123f49/NM-PRC-215-ERG-25-FEB-2548-MULTI(1).webp',
  'product_rating': '★4.7',
  'product_discount': None,
  'product_text': 'Imitation Multicolor Drop Earrings ₹199'},
 {'product_title': 'Beige Shimmer Halter Neck Gown',
  'product_price': '₹1749',
  'product_link': 'https://newme.asia/product/beige-shimmer-halter-neck-gown',
  'product_image_url

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

In [None]:
def parse_jiomart(url):

    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")

    print(f"Launching Chrome to fetch: {url}")
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    time.sleep(5)

    rendered_html = driver.page_source
    tree = html.fromstring(rendered_html)
    driver.quit()

    product_cards = tree.xpath('//*[@id="algolia_hits"]/div/ol/li')

    products = []

    for i, prod in enumerate(product_cards):
        try:
            title_raw = ''.join(prod.xpath('.//div[2]/div[2]/div/div[1]//text()')).strip()

            if "sponsored" in title_raw:
                title = ''.join(prod.xpath('.//div[2]/div[2]/div/div[2]//text()')).strip()
                if not title:
                    continue
                price = ''.join(prod.xpath('.//div[2]/div[2]/div/div[3]/div[1]/span[1]//text()')).strip()
            else:
                title = title_raw
                if not title:
                    continue
                price = ''.join(prod.xpath('.//div[2]/div[2]/div/div[2]/div[1]/span[1]//text()')).strip()

            discount = ''.join(prod.xpath('.//div[2]/div[2]/div/div[2]/div[2]/span//text()')).strip()

            img = prod.xpath('.//div[2]/div[1]/div/div[1]/img/@src')
            img = img[0] if img else None

            link = prod.xpath('.//a/@href')
            link = "https://www.jiomart.com" + link[0] if link and not link[0].startswith("http") else link[0] if link else None

            product_text = f"{title} {price} {discount}".strip()

            products.append({
                "product_title": title,
                "product_price": price,
                "product_link": link,
                "product_image_link": img,
                "product_rating": None,
                "product_discount": discount or None,
                "product_text": product_text
            })
        except Exception as e:
            print(f"Error parsing product {i+1}:", e)
    return products
parse_jiomart("https://www.jiomart.com/c/homeandkitchen/home-furnishing/bedding/31421")

Launching Chrome to fetch: https://www.jiomart.com/c/homeandkitchen/home-furnishing/bedding/31421


[{'product_title': 'Sleepsia Memory Foam Wedge Pillow - Orthopedic Back & Neck Pain Relief, Acid Reflux & Post-Surgery Support, Adjustable Leg Elevation Cushion (Blue/Grey)',
  'product_price': '₹815.00',
  'product_link': 'https://www.jiomart.com/p/homeandkitchen/sleepsia-memory-foam-orthopedic-bed-wedge-pillow-leg-elevation-incline-pillow-for-back-support-blue-grey/607958019',
  'product_image_link': 'https://www.jiomart.com/images/product/original/rvjbpus5fp/sleepsia-memory-foam-wedge-pillow-orthopedic-back-neck-pain-relief-acid-reflux-post-surgery-support-adjustable-leg-elevation-cushion-blue-grey-product-images-orvjbpus5fp-p607958019-0-202505302309.jpg?im=Resize=(360,360)',
  'product_rating': None,
  'product_discount': '72% OFF',
  'product_text': 'Sleepsia Memory Foam Wedge Pillow - Orthopedic Back & Neck Pain Relief, Acid Reflux & Post-Surgery Support, Adjustable Leg Elevation Cushion (Blue/Grey) ₹815.00 72% OFF'},
 {'product_title': 'Sleepsia Soft Luxurious Microfiber Hotel P

In [None]:
def parse_croma(url):
    tree = get_html_tree(url)
    if tree is None:
        return []
    products = []
    product_divs = tree.xpath('//li[contains(@class,"product-item")]')

    for prod in product_divs:
        try:
            title = ''.join(prod.xpath('.//h3/a/text()')).strip()
            price = ''.join(prod.xpath('.//div[2]/div[2]/div[1]/div/span//text()')).strip()
            link = prod.xpath('.//a/@href')[0]
            link = "https://www.croma.com" + link if link.startswith("/") else link
            img = ''.join(prod.xpath('//img[contains(@src, ".png")]')).strip()
            img = img[0] if img else None
            rating = None
            discount = ''.join(prod.xpath('.//div[2]/div[2]/div[2]/span[3]//text()')).strip()
            text = f"{title} {price} {discount}".strip()

            products.append({
                "product_title": title,
                "product_price": price,
                "product_link": link,
                "product_image_link": img,
                "product_rating": rating,
                "product_discount": discount,
                "product_text": text
            })
        except Exception as e:
            print(f"Error parsing product: {e}")
            continue

    return products
parse_croma("https://www.croma.com/computers-tablets/laptops/gaming-laptops/c/806?q=%3Arelevance&srsltid=AfmBOorNua7Lm5pY1gIccoWx_DjcorgjhEhEjsEgItVLHnXE_lHwnwMT")

[{'product_title': 'Lenovo LOQ 15IRX9 Intel Core i5 13th Gen Gaming Laptop (24GB, 512GB SSD, Windows 11 Home, 6GB Graphics, 15.6 inch 144 Hz Full HD IPS Display, NVIDIA GeForce RTX 4050, MS Office 2021, Luna Grey, 2.38 KG)',
  'product_price': '₹87,990',
  'product_link': 'https://www.croma.com/lenovo-loq-15irx9-intel-core-i5-13th-gen-gaming-laptop-24gb-512gb-ssd-windows-11-home-6gb-graphics-15-6-inch-144-hz-full-hd-ips-display-nvidia-geforce-rtx-4050-ms-office-2021-luna-grey-2-38-kg-/p/307439',
  'product_image_link': None,
  'product_rating': None,
  'product_discount': '30% Off',
  'product_text': 'Lenovo LOQ 15IRX9 Intel Core i5 13th Gen Gaming Laptop (24GB, 512GB SSD, Windows 11 Home, 6GB Graphics, 15.6 inch 144 Hz Full HD IPS Display, NVIDIA GeForce RTX 4050, MS Office 2021, Luna Grey, 2.38 KG) ₹87,990 30% Off'},
 {'product_title': 'HP Vectus 16-s0089AX AMD Ryen 7 Laptop (16GB, 1TB, Windows 11 Home, 16.1 inch Full HD IPS Display, MS Office 2021, Performance Blue, 2.29 KG)',
  'pr

In [None]:
def parse_nike(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    tree = get_html_tree(url, headers)
    if tree is None:
        return []

    products = []
    product_divs = tree.xpath('//div[contains(@class,"product-card product-grid__card")]')

    for prod in product_divs:
        try:
            title = ''.join(prod.xpath('.//div[@class="product-card__title"]/text()')).strip()
            price = ''.join(prod.xpath('.//div[contains(@class, "product-price")]//text()')).strip()
            link = prod.xpath('.//a/@href')[0]
            link = "https://www.nike.com" + link if link.startswith("/") else link
            img = prod.xpath('.//img[contains(@class, "product-card__hero-image")]/@data-src')
            if not img:
                img = prod.xpath('.//img[contains(@class, "product-card__hero-image")]/@src')
            img = img[0] if img else None
            rating = None
            discount = None
            subtitle= ''.join(prod.xpath('.//div[@class="product-card__subtitle"]/text()')).strip()
            text = f"{title} {subtitle} {price} {discount}".strip()
            if not title and not link:
                continue
            products.append({
                "product_title": title,
                "product_price": price,
                "product_link": link,
                "product_image_link": img,
                "product_rating": rating or None,
                "product_discount": discount or None,
                "product_text": text
            })
        except Exception as e:
            print(f"Error parsing product: {e}")
            continue

    return products
parse_nike("https://www.nike.com/in/w/mens-nik1")

[{'product_title': 'Nike Revolution 8',
  'product_price': 'MRP : ₹ 4 295.00',
  'product_link': 'https://www.nike.com/in/t/revolution-8-road-running-shoes-WMhKGn/HJ9198-101',
  'product_image_link': 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7',
  'product_rating': None,
  'product_discount': None,
  'product_text': "Nike Revolution 8 Men's Road Running Shoes MRP : ₹ 4 295.00 None"},
 {'product_title': 'Nike Downshifter 13',
  'product_price': 'MRP : ₹ 4 295.00',
  'product_link': 'https://www.nike.com/in/t/downshifter-13-road-running-shoes-4Gw85J/FD6454-001',
  'product_image_link': 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7',
  'product_rating': None,
  'product_discount': None,
  'product_text': "Nike Downshifter 13 Men's Road Running Shoes MRP : ₹ 4 295.00 None"},
 {'product_title': 'Nike SB Force 58',
  'product_price': 'MRP : ₹ 6 295.00',
  'product_link': 'https://www.nike.com/in/t/sb-force-58-skate-shoes-6

In [None]:
urls = {
    "NewMe": "https://newme.asia/collection/cannes-2025?subCategory=&product_cat=&orderby=menu_order",
    "JioMart": "https://www.jiomart.com/c/homeandkitchen/home-furnishing/bedding/31421",
    "Croma": "https://www.croma.com/computers-tablets/laptops/gaming-laptops/c/806?srsltid=AfmBOorNua7Lm5pY1gIccoWx_DjcorgjhEhEjsEgItVLHnXE_lHwnwMT",
    "Nike": "https://www.nike.com/in/w/mens-nik1"
}

results = []
results += parse_newme(urls["NewMe"])
results += parse_jiomart(urls["JioMart"])
results += parse_croma(urls["Croma"])
results += parse_nike(urls["Nike"])

print(f"\nTotal Products Fetched: {len(results)}")
pprint(results[:5])

Launching Chrome to fetch: https://www.jiomart.com/c/homeandkitchen/home-furnishing/bedding/31421

Total Products Fetched: 87
[{'product_discount': '16% off',
  'product_image_url': 'https://assets.newme.asia/wp-content/uploads/2024/11/23103344714ed636/NM-PRC-34-DRS-24-NOV-11620-GOLD(1).webp',
  'product_link': 'https://newme.asia/product/gold-shimmer-gown',
  'product_price': '₹1599',
  'product_rating': '★4.4',
  'product_text': 'Gold Shimmer Gown ₹1599 16% off',
  'product_title': 'Gold Shimmer Gown'},
 {'product_discount': None,
  'product_image_url': 'https://assets.newme.asia/wp-content/uploads/2025/05/17103234c2123f49/NM-PRC-215-ERG-25-FEB-2548-MULTI(1).webp',
  'product_link': 'https://newme.asia/product/multicolor-rhinestone-embellished-drop-earrings',
  'product_price': '₹199',
  'product_rating': '★4.7',
  'product_text': 'Imitation Multicolor Drop Earrings ₹199',
  'product_title': 'Imitation Multicolor Drop Earrings'},
 {'product_discount': '21% off',
  'product_image_url'