In [1]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL = "https://webscraper.io/test-sites/e-commerce/static"

# List to store all product data
all_products = []

print("--- Web Scraping: WebScraper.io Test E-Commerce Site ---")

try:
    # Step 1: Get all category URLs
    response = requests.get(BASE_URL)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")

    categories = soup.select("div.sidebar ul.nav > li > a")
    category_links = [
        "https://webscraper.io" + cat["href"] for cat in categories
    ]

    print(f"Found {len(category_links)} categories.")
    print("---------------------------------------")

    # Step 2: Loop through each category
    for category_url in category_links:
        print(f"Scraping category: {category_url}")
        page = 1

        while True:
            url = category_url + f"?page={page}"
            response = requests.get(url)
            if response.status_code == 404:
                break  # Stop if page doesn't exist

            soup = BeautifulSoup(response.text, "html.parser")
            products = soup.find_all("div", class_="thumbnail")

            if not products:
                break  # No more products on next page

            # Extract data for each product
            for product in products:
                if len(all_products) >= 1000: # Stop after 1000 products
                    break
                title = product.find("a", class_="title").text.strip()
                price = product.find("h4", class_="price").text.strip()
                description = product.find("p", class_="description").text.strip()

                all_products.append({
                    "Category": category_url.split("/")[-1],
                    "Title": title,
                    "Price": price,
                    "Description": description
                })

            print(f" Page {page} done ({len(all_products)} total products).")

            if len(all_products) >= 1000: # Stop after 1000 products
                break

            page += 1


        print("---------------------------------------")
        if len(all_products) >= 1000: # Stop after 1000 products
            break


    # Step 3: Save to CSV
    df = pd.DataFrame(all_products)
    df.to_csv("webscraper_1000_products.csv", index=False, encoding="utf-8")

    print(f"\n✅ Extracted {len(all_products)} products from all categories.")
    print("Data saved to 'webscraper_1000_products.csv'.")
    print("---------------------------------------")

except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

--- Web Scraping: WebScraper.io Test E-Commerce Site ---
Found 3 categories.
---------------------------------------
Scraping category: https://webscraper.io/test-sites/e-commerce/static
 Page 1 done (3 total products).
 Page 2 done (6 total products).
 Page 3 done (9 total products).
 Page 4 done (12 total products).
 Page 5 done (15 total products).
 Page 6 done (18 total products).
 Page 7 done (21 total products).
 Page 8 done (24 total products).
 Page 9 done (27 total products).
 Page 10 done (30 total products).
 Page 11 done (33 total products).
 Page 12 done (36 total products).
 Page 13 done (39 total products).
 Page 14 done (42 total products).
 Page 15 done (45 total products).
 Page 16 done (48 total products).
 Page 17 done (51 total products).
 Page 18 done (54 total products).
 Page 19 done (57 total products).
 Page 20 done (60 total products).
 Page 21 done (63 total products).
 Page 22 done (66 total products).
 Page 23 done (69 total products).
 Page 24 done (72 to