## Single Product Scraper - MAINSTREET

In [3]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import re

# Set Chrome options
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# Scroll to bottom
def scroll_to_bottom(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# Get price from page
def get_price(soup):
    """Extracts the price from the page."""
    price_tag = soup.find("span", class_="price-item--sale")
    if not price_tag:
        price_tag = soup.find("span", class_="price-item--regular")
    price = price_tag.text.strip() if price_tag else "N/A"
    return price

# Scraping logic
def scrape_product_page(url):
    try:
        driver.get(url)
        scroll_to_bottom(driver)

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product__title"))
        )

        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # --- Title ---
        title_tag = soup.find("h2", class_="h1")
        title = title_tag.text.strip() if title_tag else "N/A"

        # --- Description ---
        desc_tag = soup.find("div", class_="product__description")
        description = desc_tag.get_text(strip=True) if desc_tag else "N/A"

        # --- Sizes and Prices ---
        size_price_mapping = {}
        try:
            # Wait for the select element to be present and clickable
            size_select = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "select.select__select"))
            )
            select = Select(size_select)

            # Get all options
            options = select.options
            available_sizes = [opt.get_attribute("value") for opt in options if opt.get_attribute("value") and "Unavailable" not in opt.text]

            for size in available_sizes:
                retries = 2
                for attempt in range(retries):
                    try:
                        # Re-fetch the select element to avoid stale reference
                        size_select = WebDriverWait(driver, 10).until(
                            EC.element_to_be_clickable((By.CSS_SELECTOR, "select.select__select"))
                        )
                        select = Select(size_select)

                        # Scroll to dropdown and select size
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", size_select)
                        time.sleep(1)
                        select.select_by_value(size)

                        # Wait for price to update
                        time.sleep(3)  # Allow time for price update
                        WebDriverWait(driver, 15).until(
                            EC.presence_of_element_located((By.CSS_SELECTOR, "span.price-item--sale, span.price-item--regular"))
                        )

                        # Get updated page source and extract price
                        updated_soup = BeautifulSoup(driver.page_source, 'html.parser')
                        price = get_price(updated_soup)
                        size_price_mapping[size] = price
                        break  # Success, exit retry loop

                    except Exception as e:
                        print(f"[ERROR] Attempt {attempt + 1} failed for size {size}: {str(e)}")
                        if attempt == retries - 1:
                            size_price_mapping[size] = "Error"

        except Exception as e:
            print(f"[ERROR] Size dropdown not found or failed: {str(e)}")
            # No size dropdown, use default price
            price = get_price(soup)
            size_price_mapping["N/A"] = price

        if not size_price_mapping:
            size_price_mapping = {"N/A": "N/A"}

        # --- Images ---
        try:
            WebDriverWait(driver, 20).until(
                EC.presence_of_all_elements_located((By.XPATH, "//ul[contains(@id, 'Slider-Thumbnails')]//li//button//img"))
            )
            image_elements = driver.find_elements(By.XPATH, "//ul[contains(@id, 'Slider-Thumbnails')]//li//button//img")
            images = []

            for img in image_elements:
                src = img.get_attribute("src")
                if src.startswith("//"):
                    src = "https:" + src
                images.append(src)

            if not images:
                raise Exception("Primary thumbnail images not found")

        except Exception:
            try:
                fallback_img = driver.find_element(By.XPATH, "//div[contains(@class, 'product__media') and contains(@class, 'media--transparent')]//img")
                fallback_src = fallback_img.get_attribute("src")
                if fallback_src.startswith("//"):
                    fallback_src = "https:" + fallback_src
                images = [fallback_src]
            except Exception as fallback_error:
                print(f"Fallback image fetch failed: {fallback_error}")
                images = ["N/A"]

        return {
            "title": title,
            "url": url,
            "size_price_mapping": size_price_mapping,
            "description": description,
            "images": images
        }

    except Exception as e:
        print(f"[ERROR] Failed to scrape {url} due to {str(e)}")
        return {
            "title": "Error",
            "url": url,
            "size_price_mapping": {"N/A": "Error"},
            "description": "Error",
            "images": ["Error"]
        }

# --- USER INPUT ---
url = input("Enter the mainstreet product URL: ").strip()

# --- SCRAPE ---
data = scrape_product_page(url)

# --- PRINT SCRAPED DATA ---
print(f"\nScraped Data for {url}:\n")
print(f"Title       : {data['title']}")
print(f"Size-Price Mapping:")
for size, price in data['size_price_mapping'].items():
    print(f" - Size {size}: {price}")
print(f"Description : {data['description']}\n")
print("Images:")
for img_url in data['images']:
    print(f" - {img_url}")

# --- SAVE TO DATAFRAME ---
rows = []
for size, price in data['size_price_mapping'].items():
    rows.append({
        "title": data['title'],
        "url": data['url'],
        "size": size,
        "price": price,
        "description": data['description'],
        "images": ', '.join(data['images'])
    })

df = pd.DataFrame(rows)

# --- Close Browser ---
driver.quit()

Enter the mainstreet product URL:  https://marketplace.mainstreet.co.in/products/jordan-1-low-bred-toe-2-14



Scraped Data for https://marketplace.mainstreet.co.in/products/jordan-1-low-bred-toe-2-14:

Title       : Nike Jordan 1 Low Bred Toe 2.0
Size-Price Mapping:
 - Size UK 4.5: Rs. 9,299.00
 - Size UK 5: Rs. 9,799.00
 - Size UK 5.5: Rs. 9,799.00
 - Size UK 6: Rs. 9,999.00
 - Size UK 7: Rs. 9,999.00
 - Size UK 7.5: Rs. 9,999.00
 - Size UK 8: Rs. 9,999.00
 - Size UK 8.5: Rs. 9,999.00
 - Size UK 9: Rs. 9,999.00
 - Size UK 9.5: Rs. 9,499.00
 - Size UK 10: Rs. 9,999.00
 - Size UK 10.5: Rs. 9,999.00
 - Size UK 11: Rs. 9,999.00
 - Size UK 12: Rs. 10,999.00
Description : Size Guide - These fit true to size, so go for the size that fits you in most brands. If you wear a UK 8, go for a UK 8.Air Jordan 1 Low shoes are the classic style of Jordan Retro 1. The shoes have an upper made of leather and synthetic material. A solid rubber outsole with flexible grooves provides traction while playing. The shoes are durable and guarantee comfort while moving.

Images:
 - https://marketplace.mainstreet.co.in/

In [4]:
df

Unnamed: 0,title,url,size,price,description,images
0,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 4.5,"Rs. 9,299.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
1,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 5,"Rs. 9,799.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
2,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 5.5,"Rs. 9,799.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
3,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 6,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
4,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 7,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
5,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 7.5,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
6,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 8,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
7,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 8.5,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
8,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 9,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...
9,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 9.5,"Rs. 9,499.00","Size Guide - These fit true to size, so go for...",https://marketplace.mainstreet.co.in/cdn/shop/...


In [5]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import re

# Set Chrome options
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# Scroll to bottom
def scroll_to_bottom(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# Get price from page
def get_price(soup):
    """Extracts the price from the page."""
    price_tag = soup.find("span", class_="price-item--sale")
    if not price_tag:
        price_tag = soup.find("span", class_="price-item--regular")
    price = price_tag.text.strip() if price_tag else "N/A"
    return price

# Scraping logic
def scrape_product_page(url):
    try:
        driver.get(url)
        scroll_to_bottom(driver)

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product__title"))
        )

        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # --- Title ---
        title_tag = soup.find("h2", class_="h1")
        title = title_tag.text.strip() if title_tag else "N/A"

        # --- Description ---
        desc_tag = soup.find("div", class_="product__description")
        description = desc_tag.get_text(strip=True) if desc_tag else "N/A"

        # --- Sizes and Prices ---
        size_price_mapping = {}
        try:
            # Wait for the select element to be present and clickable
            size_select = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "select.select__select"))
            )
            select = Select(size_select)

            # Get all options
            options = select.options
            available_sizes = [opt.get_attribute("value") for opt in options if opt.get_attribute("value") and "Unavailable" not in opt.text]

            for size in available_sizes:
                retries = 2
                for attempt in range(retries):
                    try:
                        # Re-fetch the select element to avoid stale reference
                        size_select = WebDriverWait(driver, 10).until(
                            EC.element_to_be_clickable((By.CSS_SELECTOR, "select.select__select"))
                        )
                        select = Select(size_select)

                        # Scroll to dropdown and select size
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", size_select)
                        time.sleep(1)
                        select.select_by_value(size)

                        # Wait for price to update
                        time.sleep(3)  # Allow time for price update
                        WebDriverWait(driver, 15).until(
                            EC.presence_of_element_located((By.CSS_SELECTOR, "span.price-item--sale, span.price-item--regular"))
                        )

                        # Get updated page source and extract price
                        updated_soup = BeautifulSoup(driver.page_source, 'html.parser')
                        price = get_price(updated_soup)
                        size_price_mapping[size] = price
                        break  # Success, exit retry loop

                    except Exception as e:
                        print(f"[ERROR] Attempt {attempt + 1} failed for size {size}: {str(e)}")
                        if attempt == retries - 1:
                            size_price_mapping[size] = "Error"

        except Exception as e:
            print(f"[ERROR] Size dropdown not found or failed: {str(e)}")
            # No size dropdown, use default price
            price = get_price(soup)
            size_price_mapping["N/A"] = price

        if not size_price_mapping:
            size_price_mapping = {"N/A": "N/A"}

        # --- Images ---
        try:
            WebDriverWait(driver, 20).until(
                EC.presence_of_all_elements_located((By.XPATH, "//ul[contains(@id, 'Slider-Thumbnails')]//li//button//img"))
            )
            image_elements = driver.find_elements(By.XPATH, "//ul[contains(@id, 'Slider-Thumbnails')]//li//button//img")
            images = []

            for img in image_elements:
                src = img.get_attribute("src")
                if src.startswith("//"):
                    src = "https:" + src
                images.append(src)

            if not images:
                raise Exception("Primary thumbnail images not found")

        except Exception:
            try:
                fallback_img = driver.find_element(By.XPATH, "//div[contains(@class, 'product__media') and contains(@class, 'media--transparent')]//img")
                fallback_src = fallback_img.get_attribute("src")
                if fallback_src.startswith("//"):
                    fallback_src = "https:" + fallback_src
                images = [fallback_src]
            except Exception as fallback_error:
                print(f"Fallback image fetch failed: {fallback_error}")
                images = ["N/A"]

        return {
            "title": title,
            "url": url,
            "size_price_mapping": size_price_mapping,
            "description": description,
            "images": images
        }

    except Exception as e:
        print(f"[ERROR] Failed to scrape {url} due to {str(e)}")
        return {
            "title": "Error",
            "url": url,
            "size_price_mapping": {"N/A": "Error"},
            "description": "Error",
            "images": ["Error"]
        }

# --- USER INPUT ---
url = input("Enter the mainstreet product URL: ").strip()

# --- SCRAPE ---
data = scrape_product_page(url)

# --- PRINT SCRAPED DATA ---
print(f"\nScraped Data for {url}:\n")
print(f"Title       : {data['title']}")
print(f"Size-Price Mapping:")
for size, price in data['size_price_mapping'].items():
    print(f" - Size {size}: {price}")
print(f"Description : {data['description']}\n")
print("Images:")
for img_url in data['images']:
    print(f" - {img_url}")

# --- SAVE TO DATAFRAME ---
rows = []
for size, price in data['size_price_mapping'].items():
    rows.append({
        "title": data['title'],
        "url": data['url'],
        "size": size,
        "price": price,
        "description": data['description'],
        "sku":"N/A",
        "images": ', '.join(data['images'])
    })

df = pd.DataFrame(rows)

# --- Close Browser ---
driver.quit()

Enter the mainstreet product URL:  https://marketplace.mainstreet.co.in/products/jordan-1-low-bred-toe-2-14



Scraped Data for https://marketplace.mainstreet.co.in/products/jordan-1-low-bred-toe-2-14:

Title       : Nike Jordan 1 Low Bred Toe 2.0
Size-Price Mapping:
 - Size UK 4.5: Rs. 9,299.00
 - Size UK 5: Rs. 9,799.00
 - Size UK 5.5: Rs. 9,799.00
 - Size UK 6: Rs. 9,999.00
 - Size UK 7: Rs. 9,999.00
 - Size UK 7.5: Rs. 9,999.00
 - Size UK 8: Rs. 9,999.00
 - Size UK 8.5: Rs. 9,999.00
 - Size UK 9: Rs. 9,999.00
 - Size UK 9.5: Rs. 9,499.00
 - Size UK 10: Rs. 9,999.00
 - Size UK 10.5: Rs. 9,999.00
 - Size UK 11: Rs. 9,999.00
 - Size UK 12: Rs. 10,999.00
Description : Size Guide - These fit true to size, so go for the size that fits you in most brands. If you wear a UK 8, go for a UK 8.Air Jordan 1 Low shoes are the classic style of Jordan Retro 1. The shoes have an upper made of leather and synthetic material. A solid rubber outsole with flexible grooves provides traction while playing. The shoes are durable and guarantee comfort while moving.

Images:
 - https://marketplace.mainstreet.co.in/

In [6]:
df

Unnamed: 0,title,url,size,price,description,sku,images
0,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 4.5,"Rs. 9,299.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
1,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 5,"Rs. 9,799.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
2,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 5.5,"Rs. 9,799.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
3,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 6,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
4,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 7,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
5,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 7.5,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
6,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 8,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
7,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 8.5,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
8,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 9,"Rs. 9,999.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
9,Nike Jordan 1 Low Bred Toe 2.0,https://marketplace.mainstreet.co.in/products/...,UK 9.5,"Rs. 9,499.00","Size Guide - These fit true to size, so go for...",,https://marketplace.mainstreet.co.in/cdn/shop/...
