In [1]:
import requests
import time
import random
import re
from bs4 import BeautifulSoup

store_filters = {
    "kingwest": "filters=Locations,King+West+(95+Bathurst+St.)",
    "midtown": "filters=Locations,Midtown+(2488+Yonge+St.)"
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

max_pages = 50

for store, filter_param in store_filters.items():
    combined_scripts = ""

    for page in range(1, max_pages + 1):
        url = f"https://www.blacktoerunning.com/collections/all?page={page}&{filter_param}"
        print(f"📥 Fetching {store} page {page}...")

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        found = False
        for script in soup.find_all("script"):
            if script.string and "productVariants" in script.string:
                combined_scripts += f"\n<!-- PAGE {page} -->\n{script.string}"
                found = True
                break  # only grab first match

        if not found:
            print(f"🛑 No productVariants found on page {page}. Stopping.")
            break

        time.sleep(random.uniform(2.2, 3.4))

    # Save to one JS-snippet file per store
    filename = f"blacktoe_{store}_scripts.txt"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(combined_scripts)

    print(f"✅ Saved productVariants blocks to {filename}")


📥 Fetching kingwest page 1...
📥 Fetching kingwest page 2...
📥 Fetching kingwest page 3...
📥 Fetching kingwest page 4...
📥 Fetching kingwest page 5...
📥 Fetching kingwest page 6...
📥 Fetching kingwest page 7...
📥 Fetching kingwest page 8...
📥 Fetching kingwest page 9...
📥 Fetching kingwest page 10...
📥 Fetching kingwest page 11...
📥 Fetching kingwest page 12...
📥 Fetching kingwest page 13...
📥 Fetching kingwest page 14...
📥 Fetching kingwest page 15...
📥 Fetching kingwest page 16...
📥 Fetching kingwest page 17...
📥 Fetching kingwest page 18...
📥 Fetching kingwest page 19...
📥 Fetching kingwest page 20...
📥 Fetching kingwest page 21...
📥 Fetching kingwest page 22...
📥 Fetching kingwest page 23...
📥 Fetching kingwest page 24...
📥 Fetching kingwest page 25...
📥 Fetching kingwest page 26...
📥 Fetching kingwest page 27...
📥 Fetching kingwest page 28...
📥 Fetching kingwest page 29...
📥 Fetching kingwest page 30...
📥 Fetching kingwest page 31...
📥 Fetching kingwest page 32...
📥 Fetching kingwe

In [4]:
import requests
import re
from bs4 import BeautifulSoup

# === Choose your store ===
store = "kingwest"  # Change to "midtown" if needed

store_filters = {
    "kingwest": "filters=Locations,King+West+(95+Bathurst+St.)",
    "midtown": "filters=Locations,Midtown+(2488+Yonge+St.)"
}

# Build URL
base_url = f"https://www.blacktoerunning.com/collections/all?page=1&{store_filters[store]}"

# Set headers
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

# === Step 1: Download the page
response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# === Step 2: Find the script tag with webPixelsManagerAPI.publish
script_block = None
for script in soup.find_all("script"):
    if script.string and "webPixelsManagerAPI.publish(\"collection_viewed\"" in script.string:
        script_block = script.string
        break

# === Step 3: Save to .txt file
if script_block:
    filename = f"blacktoe_{store}_page1_script.txt"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(script_block)
    print(f"✅ Saved product data script from page 1 to {filename}")
else:
    print("❌ Script block not found on page 1.")


✅ Saved product data script from page 1 to blacktoe_kingwest_page1_script.txt


In [7]:
import json
import re
import pandas as pd

# === Step 1: Load the script text file ===
with open("blacktoe_kingwest_page1_script.txt", "r", encoding="utf-8") as f:
    content = f.read()

# === Step 2: Extract full collection JSON block ===
# Match the JS call: webPixelsManagerAPI.publish("collection_viewed", { ... });
matches = re.findall(
    r'webPixelsManagerAPI\.publish\("collection_viewed",\s*(\{.*?\})\s*\);',
    content,
    re.DOTALL
)

# === Step 3: Parse the JSON and extract productVariants ===
product_variants = []
collection_data = {}

for match in matches:
    try:
        clean_json = match.replace('\u0026', '&')  # fix escaped characters
        parsed = json.loads(clean_json)
        if "collection" in parsed and "productVariants" in parsed["collection"]:
            collection_data = parsed["collection"]
            product_variants = collection_data["productVariants"]
            break
    except Exception as e:
        continue

# === Step 4: Flatten the productVariants into a clean list ===
flattened_variants = []

for variant in product_variants:
    product = variant["product"]
    price = variant["price"]
    image_url = variant.get("image", {}).get("src", "")

    flattened_variants.append({
        "collection_id": collection_data.get("id", ""),
        "collection_title": collection_data.get("title", ""),
        "product_title": product.get("title", ""),
        "vendor": product.get("vendor", ""),
        "type": product.get("type", ""),
        "product_id": product.get("id", ""),
        "product_url": "https://www.blacktoerunning.com" + product.get("url", ""),
        "variant_id": variant.get("id", ""),
        "variant_title": variant.get("title", ""),
        "untranslated_variant_title": variant.get("untranslatedTitle", ""),
        "sku": variant.get("sku", ""),
        "price_amount": price.get("amount", ""),
        "currency": price.get("currencyCode", ""),
        "image_url": "https:" + image_url if image_url.startswith("//") else image_url
    })

# === Step 5: Export to CSV ===
df = pd.DataFrame(flattened_variants)
df.to_csv("blacktoe_kingwest_parsed_page1.csv", index=False, encoding="utf-8")
print("✅ CSV saved: blacktoe_kingwest_parsed_page1.csv")


✅ CSV saved: blacktoe_kingwest_parsed_page1.csv


In [10]:
import requests
from bs4 import BeautifulSoup

url = "https://www.blacktoerunning.com/collections/all?page=1&filters=Locations,King+West+(95+Bathurst+St.)"
headers = {
    'User-Agent': 'Mozilla/5.0'
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

products = soup.select("li.productgrid--item")

print(f"✅ Found {len(products)} visible product cards on page 1.")

for product in products[:3]:  # preview first 3
    title = product.select_one(".productitem--title")
    price = product.select_one(".price--main .money")
    link = product.select_one("a[href]")
    
    print({
        "title": title.text.strip() if title else None,
        "price": price.text.strip() if price else None,
        "url": "https://www.blacktoerunning.com" + link['href'] if link else None
    })


✅ Found 0 visible product cards on page 1.


In [11]:
with open("kingwest_page1.html", "w", encoding="utf-8") as f:
    f.write(response.text)


In [13]:
import requests

# Define the URL (page 1 of King West inventory)
url = "https://www.blacktoerunning.com/collections/all?page=1&filters=Locations,King+West+(95+Bathurst+St.)"

# Use a real User-Agent to avoid bot detection
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}

# Fetch the HTML of the page
response = requests.get(url, headers=headers)

# Save the response HTML to a file
with open("kingwest_page1.html", "w", encoding="utf-8") as f:
    f.write(response.text)

print("✅ Saved kingwest_page1.html. Open it in your browser to inspect.")


✅ Saved kingwest_page1.html. Open it in your browser to inspect.


In [15]:
!pip install selenium

Collecting selenium
  Using cached selenium-4.32.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Using cached trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Using cached trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Downloading attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Using cached outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Using cached wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Using cached selenium-4.32.0-py3-none-any.whl (9.4 MB)
Using cached trio-0.30.0-py3-none-any.whl (499 kB)
Using cached trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloading attrs-25.3.0-py3-none-any.whl (63 kB)
Using cached outcome-1.3.0.post0-py2.py3-none-any.whl (10 kB)
Using cached wsproto-1.2.0-py3-none-any.whl (24 kB)
Inst

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import csv

# === Headless setup ===
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

url = "https://www.blacktoerunning.com/collections/all?page=1&filters=Locations,King+West+(95+Bathurst+St.)"
driver.get(url)

# Wait for product cards to load (up to 10 sec)
try:
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "li.productgrid--item"))
    )
    print("✅ Product cards loaded.")
except:
    print("❌ Timed out waiting for products.")
    driver.quit()
    exit()

# Parse loaded content
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()

products = soup.select("li.productgrid--item")
print(f"✅ Found {len(products)} visible products on page 1.")

# Extract data
data = []
for p in products:
    title = p.select_one(".productitem--title")
    price = p.select_one(".price--main .money")
    link = p.select_one("a[href]")

    data.append({
        "title": title.text.strip() if title else None,
        "price": price.text.strip() if price else None,
        "url": "https://www.blacktoerunning.com" + link["href"] if link else None
    })

# Save to CSV
with open("kingwest_page1_visible_products.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["title", "price", "url"])
    writer.writeheader()
    writer.writerows(data)

print("📦 Exported to kingwest_page1_visible_products.csv")


❌ Timed out waiting for products.


MaxRetryError: HTTPConnectionPool(host='localhost', port=54903): Max retries exceeded with url: /session/ed7c1558528ffa4611a8baad0ca3b894/source (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000019E0977CAA0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

: 