In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import re

# Chrome setup
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

def scroll_to_bottom(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

def get_price(soup):
    """Extracts the price from the page."""
    price_tag = soup.select_one("sale-price span.cvc-money")
    price = price_tag.get_text(strip=True).replace("MRP", "").strip() if price_tag else "N/A"
    return price

def scrape_crepdogcrew_product(url):
    try:
        driver.get(url)
        scroll_to_bottom(driver)

        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product-info__title"))
        )

        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # --- Title ---
        title_tag = soup.find("h1", class_="product-info__title")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        # --- Description and SKU extraction ---
        desc_container = soup.find("div", class_="accordion__content")
        description = "N/A"
        sku = "N/A"

        if desc_container:
            prose_div = desc_container.find("div", class_="prose")
            if prose_div:
                paragraphs = prose_div.find_all("p")
                desc_parts = []
                for p in paragraphs:
                    text = p.get_text(strip=True)
                    if "SKU" in text.upper():
                        sku_match = re.search(r"SKU\s*[-:–]?\s*(.+)", text, re.I)
                        if sku_match:
                            sku = sku_match.group(1).strip()
                    else:
                        desc_parts.append(text)
                description = " ".join(desc_parts).strip()

        if not description:
            description = "N/A"
        if not sku:
            sku = "N/A"

        # --- Sizes and Prices ---
        size_price_mapping = {}
        fieldsets = soup.find_all("fieldset", class_="variant-picker__option")
        size_grid_found = False

        for fieldset in fieldsets:
            legend = fieldset.find("legend")
            if legend and "size" in legend.get_text(strip=True).lower():
                size_grid_found = True
                labels = fieldset.select("label.block-swatch:not(.is-disabled)")
                for label in labels:
                    span = label.find("span")
                    if span:
                        size = span.get_text(strip=True)
                        # Find the corresponding input for clicking
                        input_id = label.get("for")
                        if input_id:
                            retries = 2
                            for attempt in range(retries):
                                try:
                                    # Scroll to the size button
                                    size_button = driver.find_element(By.ID, input_id)
                                    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", size_button)
                                    time.sleep(1)  # Small pause after scrolling

                                    # Click using JavaScript to avoid interception
                                    driver.execute_script("arguments[0].click();", size_button)

                                    # Wait for price to update
                                    time.sleep(3)  # Increased for stability
                                    WebDriverWait(driver, 15).until(
                                        EC.presence_of_element_located((By.CSS_SELECTOR, "sale-price span.cvc-money"))
                                    )

                                    # Get updated page source and extract price
                                    updated_soup = BeautifulSoup(driver.page_source, 'html.parser')
                                    price = get_price(updated_soup)
                                    size_price_mapping[size] = price
                                    break  # Success, exit retry loop

                                except Exception as e:
                                    print(f"[ERROR] Attempt {attempt + 1} failed for size {size}: {str(e)}")
                                    if attempt == retries - 1:
                                        size_price_mapping[size] = "Error"

        # If no size grid is found, scrape the default price
        if not size_grid_found:
            price = get_price(soup)
            size_price_mapping = {"N/A": price}

        if not size_price_mapping:
            size_price_mapping = {"N/A": "N/A"}

        # --- Images ---
        images = []
        img_tags = soup.select("page-dots img")
        for img in img_tags:
            src = img.get("src")
            if src and src.startswith("//"):
                images.append("https:" + src)

        # If no images found in page-dots, check product-gallery__media
        if not images:
            gallery_img = soup.select_one("div.product-gallery__media.snap-center.is-selected img")
            if gallery_img:
                src = gallery_img.get("src")
                if src and src.startswith("//"):
                    images.append("https:" + src)

        if not images:
            images = ["N/A"]

        return {
            "title": title,
            "url": url,
            "size_price_mapping": size_price_mapping,
            "description": description,
            "sku": sku,
            "images": images
        }

    except Exception as e:
        print(f"[ERROR] Scraping failed for {url}: {str(e)}")
        return {
            "title": "Error",
            "url": url,
            "size_price_mapping": {"N/A": "Error"},
            "description": "Error",
            "sku": "Error",
            "images": ["Error"]
        }

# --- Input ---
url = input("Enter the Crepdogcrew product URL: ").strip()

# --- Scrape ---
data = scrape_crepdogcrew_product(url)

# --- Print ---
print(f"\nScraped Data for {url}:\n")
print(f"Title       : {data['title']}")
print(f"Size-Price Mapping:")
for size, price in data['size_price_mapping'].items():
    print(f" - {size}: {price}")
print(f"SKU         : {data['sku']}")
print(f"Description :\n{data['description']}\n")
print("Images:")
for img in data['images']:
    print(f" - {img}")

# --- Save to DataFrame ---
rows = []
for size, price in data['size_price_mapping'].items():
    rows.append({
        "title": data['title'],
        "url": data['url'],
        "size": size,
        "price": price,
        "description": data['description'],
        "sku": data['sku'],
        "images": data['images']
    })

df = pd.DataFrame(rows)

# --- Cleanup ---
driver.quit()

Enter the Crepdogcrew product URL:  https://crepdogcrew.com/products/jordan-4-retro-sb-navy



Scraped Data for https://crepdogcrew.com/products/jordan-4-retro-sb-navy:

Title       : Jordan 4 Retro SB Navy
Size-Price Mapping:
 - UK 6: ₹29,999
 - UK 6.5: ₹29,999
 - UK 7: ₹29,999
 - UK 7.5: ₹29,999
 - UK 8: ₹29,999
 - UK 8.5: ₹29,999
 - UK 9: ₹32,999
 - UK 9.5: ₹32,999
 - UK 10: ₹34,999
 - UK 11: ₹34,999
 - UK 12: ₹34,999
SKU         : DR5415-100
Description :
The Nike SB x Air Jordan 4 "Navy" features a summit white leather upper with navy blue accents on the eyelets, midsole, and heel tab, plus a gum rubber outsole. This sneaker is part of a collaboration between Nike Skateboarding and Jordan Brand Color Shown:Summit White/White/Navy/Neutral Grey/Gum Yellow/Varsity Red

Images:
 - https://crepdogcrew.com/cdn/shop/files/1_a6c57b52-aef1-4300-afc8-8c65d0542e66.png?v=1744910930&width=1080
 - https://crepdogcrew.com/cdn/shop/files/2_6de68890-d817-468d-a18a-6d81abade749.png?v=1744910930&width=1080


In [2]:
df

Unnamed: 0,title,url,size,price,description,sku,images
0,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 6,"₹29,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
1,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 6.5,"₹29,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
2,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 7,"₹29,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
3,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 7.5,"₹29,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
4,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 8,"₹29,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
5,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 8.5,"₹29,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
6,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 9,"₹32,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
7,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 9.5,"₹32,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
8,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 10,"₹34,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
9,Jordan 4 Retro SB Navy,https://crepdogcrew.com/products/jordan-4-retr...,UK 11,"₹34,999","The Nike SB x Air Jordan 4 ""Navy"" features a s...",DR5415-100,[https://crepdogcrew.com/cdn/shop/files/1_a6c5...
