<a href="https://colab.research.google.com/github/suprithakonaje/smartdeal-stylist/blob/develop/smartdealstylist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [24]:
def compute_discount(original, sale):
    if not original or not sale:
        return None
    return round((1 - (sale / original)) * 100)

In [25]:
def scrape(category_id: int, region_group_id: int = 8):

    # TODO - add pagination so it scrapes all pages of a category automatically
    # right now it only fetches the first batch of products. This is essential if you want a full catalog of deals

    url = f"https://www.zara.com/us/en/category/{category_id}/products?regionGroupId={region_group_id}&ajax=true"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept": "application/json"
    }
    try:
      r = requests.get(url, headers=headers, timeout=20)
      r.raise_for_status()
      data = r.json()
    except Exception as e:
      print(f"Zara scraping failed: {e}")
      return []

    products = []

    for group in data.get("productGroups", []):
      for element in group.get("elements", []):
        for component in element.get("commercialComponents", []):
          name = component.get("name")
          product_id = component.get("id")
          url_path = component.get("seo", "")
          seo_slug = component.get("seo", {}).get("keyword", "")
          product_url = f"https://www.zara.com/us/en/{seo_slug}-p{product_id}.html" if product_id and seo_slug else None

          original = component.get("oldPrice")
          sale = component.get("price")

          discount_percent = component.get("displayDiscountPercentage")
          discount_label = component.get("discountLabel")

          colors = component.get("detail", {}).get("colors", [])
          image_url = None
          if colors and colors[0].get("xmedia"):
            image_url = colors[0]["xmedia"][0]["url"].replace("{width}", "600")

          products.append({
            "brand": "Zara",
            "name": name,
            "original_price": original / 100 if original else None,
            "sale_price": sale / 100 if sale else None,
            "discount_percent": discount_percent,
            "discount_label": discount_label,
            "image_url": image_url,
            "url": product_url
          })
    return products

In [None]:
zara_items = scrape(2419737)
df = pd.DataFrame(zara_items)
df.to_csv("deals_raw.csv", index=False)
print(f"Saved {len(df)} items into deals_raw.csv")


Saved 270 items into deals_raw.csv
