In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "https://www.poolweb.com"
START_URL = f"{BASE_URL}/collections/chlorine"
HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_product_links():
    res = requests.get(START_URL, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")
    links = []

    for a in soup.select("a.boost-pfs-action-overlay"):
        href = a.get("href")
        if href and href.startswith("/products/"):
            full_link = BASE_URL + href
            links.append(full_link)
    return links

def parse_product(url):
    res = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(res.text, "html.parser")

    def get_text(selector):
        tag = soup.select_one(selector)
        return tag.get_text(strip=True) if tag else ""

    # Title
    title = get_text("h1.prod-title")

    # Overview
    overview_items = [li.get_text(strip=True) for li in soup.select("div.features li")]
    overview = " | ".join(overview_items)

    # Price
    price = get_text("div.main-price")

    # Stock
    stock = get_text("div.stock-status")

    # Description
    desc_block = soup.select_one("div.product-description.rte")
    description = desc_block.get_text(separator=" ", strip=True) if desc_block else ""

    # Specs: all key-value pairs
    specs = {}
    for row in soup.select("table.striped-table tr"):
        key = row.select_one("td.spec-name")
        value = row.select_one("td.spec-value")
        if key and value:
            specs[key.get_text(strip=True)] = value.get_text(strip=True)

    # Core data + all specs merged
    data = {
        "title": title,
        "overview": overview,
        "price": price,
        "stock": stock,
        "description": description,
        "link": url,
    }
    data.update(specs)  # Merge specs into main dict

    return data

# ---- Main Execution ----
product_links = get_product_links()
print(f"🔗 Total product links found: {len(product_links)}")

all_data = []
for i, link in enumerate(product_links, 1):
    print(f"🔍 [{i}] {link}")
    try:
        data = parse_product(link)
        all_data.append(data)
        time.sleep(1)
    except Exception as e:
        print(f"⚠️ Error on {link}: {e}")

# Convert to DataFrame
df = pd.DataFrame(all_data)
df.head()


🔗 Total product links found: 23
🔍 [1] https://www.poolweb.com/products/pool-season-chlorinating-tablets-wrapped-3-inch-50-lb-bucket
🔍 [2] https://www.poolweb.com/products/pool-season-chlorinating-tablets-wrapped-3-inch-25-lb-bucket
🔍 [3] https://www.poolweb.com/products/pool-season-chlorinating-tablets-wrapped-3-inch-8-lb-pail
🔍 [4] https://www.poolweb.com/products/pool-season-non-chlorine-shock-oxidizer-1-lb-pouch
🔍 [5] https://www.poolweb.com/products/pool-season-chlorinating-concentrate-50-lb-bucket
🔍 [6] https://www.poolweb.com/products/pool-season-chlorinating-concentrate-2-lb-jar
🔍 [7] https://www.poolweb.com/products/cal-hypo-shockwave-1-lb-pouch
🔍 [8] https://www.poolweb.com/products/pool-season-chlorinating-concentrate-25-lb-pail
🔍 [9] https://www.poolweb.com/products/pool-season-chlorinating-concentrate-1-lb-pouch
🔍 [10] https://www.poolweb.com/products/hock-68-calcium-hypochlorite-case-of-12-1-lb-bags
🔍 [11] https://www.poolweb.com/products/calcium-hypochlorite-briquettes-50

Unnamed: 0,title,overview,price,stock,description,link,Product Category,Poolweb Item #,Cross Reference #,Manufacturer,Shipping Weight
0,Pool Season Chlorinating Tablets Wrapped - 3 I...,3 inch wrapped tablets | 90% available chlorin...,$277.33,209 in stock,Description The Pool Season Chlorinating Table...,https://www.poolweb.com/products/pool-season-c...,Chlorine,HGH-50-3750,47251280,Pool Season,52.0 lb
1,Pool Season Chlorinating Tablets Wrapped - 3 I...,3 inch wrapped tablets | 90% available chlorin...,$147.01,225 in stock,Description The Pool Season Chlorinating Table...,https://www.poolweb.com/products/pool-season-c...,Chlorine,HGH-50-3725,47251270,Pool Season,27.0 lb
2,Pool Season Chlorinating Tablets Wrapped - 3 I...,3 inch wrapped tablets | 90% available chlorin...,$79.82,200 in stock,Description The Pool Season Chlorinating Table...,https://www.poolweb.com/products/pool-season-c...,Chlorine,HGH-50-3708,47251260,Pool Season,10.0 lb
3,Pool Season Non-Chlorine Shock Oxidizer - 1 Lb...,Non-chlorine oxidizer will not raise chlorine ...,$13.85,248 in stock,Description The Pool Season Non-Chlorine Shock...,https://www.poolweb.com/products/pool-season-n...,Chlorine,HGH-50-2849,47251330,Pool Season,2.0 lb
4,Pool Season Chlorinating Concentrate - 50 Lb. ...,Stabilized chlorinating concentrate | Totally ...,$334.29,214 in stock,Description This is a 50 pound bucket of Pool ...,https://www.poolweb.com/products/pool-season-c...,Chlorine,HGH-50-2650,47251320,Pool Season,52.0 lb


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   title              23 non-null     object
 1   overview           23 non-null     object
 2   price              23 non-null     object
 3   stock              23 non-null     object
 4   description        23 non-null     object
 5   link               23 non-null     object
 6   Product Category   23 non-null     object
 7   Poolweb Item #     23 non-null     object
 8   Cross Reference #  13 non-null     object
 9   Manufacturer       23 non-null     object
 10  Shipping Weight    23 non-null     object
dtypes: object(11)
memory usage: 2.1+ KB


In [12]:
df.to_csv('poolweb_chlorine.csv', index=False)