In [1]:
# import libararies
import os
import time
import csv
import logging
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# -------------------------
# Configuration Flags
# -------------------------
TEST_MODE = True        # Set to False to run through the entire page
ROWS_TO_SCRAPE = 5      # Only used when TEST_MODE is True



In [3]:
# -------------------------
# Logging configuration
# -------------------------
logging.basicConfig(
    filename='scraping_errors.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# -------------------------
# Set up Selenium WebDriver using webdriver_manager
# -------------------------
chrome_options = Options()
chrome_options.add_argument('--headless')  # run in headless mode; comment out if debugging
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
wait = WebDriverWait(driver, 20)

In [4]:
# ------------------------------
# Main URL Scraper Configuration 
# ------------------------------
base_exhibitors_url = "https://kbis2025.smallworldlabs.com/exhibitors"
output_csv = r"C:\Users\jchan\csi360_fire_police\cabinet_vendors_list\cabinets\resources\output\vendor_list.csv"
exhibitor_data = []

try:
    # 1. Load the exhibitors page and wait for the table's tbody element
    logging.info(f"Loading exhibitors page: {base_exhibitors_url}")
    driver.get(base_exhibitors_url)
    # Use the full XPath to the table's tbody
    table_body_xpath = "/html/body/div[2]/div[3]/div[2]/div/div/div/section/div[5]/div[2]/div[1]/table/tbody"
    wait.until(EC.presence_of_element_located((By.XPATH, table_body_xpath)))
    table_body = driver.find_element(By.XPATH, table_body_xpath)
    
    rows = table_body.find_elements(By.TAG_NAME, "tr")
    logging.info(f"Found {len(rows)} rows in the exhibitors table.")

except Exception as e:
    logging.error(f"An error occurred while loading the exhibitors page: {e}")

# 2. Build a list of exhibitor URLs from the table
exhibitor_urls = []
for i, row in enumerate(rows):
    if TEST_MODE and i >= ROWS_TO_SCRAPE:
        logging.info("Test mode enabled: Stopping after processing first few rows.")
        break
    try:
        # Use relative XPath to get the link element from the row
        link_elem = row.find_element(By.XPATH, ".//td[2]/span/a")
        url = link_elem.get_attribute("href")
        exhibitor_urls.append(url)
    except NoSuchElementException as e:
        logging.error(f"Could not find exhibitor link in row {i+1}: {e}")

# 3. Iterate over each exhibitor URL to extract the website only
for i, url in enumerate(exhibitor_urls):
    logging.info(f"Processing exhibitor {i+1} URL: {url}")
    driver.get(url)
    # Wait for the upper section of the page to load
    wait.until(EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/div[3]/div[1]")))
    
    try:
        # Extract the website using the provided absolute XPath
        website_xpath = "/html/body/div[2]/div[3]/div[1]/div/div/div/div/div/div/div[3]/div/div[1]/div[3]/div[2]"
        website = driver.find_element(By.XPATH, website_xpath).text.strip()
        exhibitor_data.append({"Website": website})
    except Exception as e:
        logging.error(f"Error extracting website for exhibitor at {url}: {e}")
    
    # Wait 2 seconds between processing exhibitors
    time.sleep(2)

# Ensure the driver is closed even if an error occurred
driver.quit()

# -------------------------
# Ensure the output directory exists
# -------------------------
output_dir = os.path.dirname(output_csv)
os.makedirs(output_dir, exist_ok=True)

# -------------------------
# Save extracted data to CSV
# -------------------------
csv_headers = ["Website"]
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=csv_headers)
    writer.writeheader()
    for item in exhibitor_data:
        writer.writerow(item)

logging.info(f"Scraping completed. Data saved to {output_csv}.")
print(f"Scraping completed. Data saved to {output_csv}.")

Scraping completed. Data saved to C:\Users\jchan\csi360_fire_police\cabinet_vendors_list\cabinets\resources\output\vendor_list.csv.


In [None]:
# -------------------------
# Main Scraper Configuration
# -------------------------
base_exhibitors_url = "https://kbis2025.smallworldlabs.com/exhibitors"
output_csv = r"C:\Users\jchan\csi360_fire_police\cabinet_vendors_list\cabinets\resources\output\vendor_list.csv"
exhibitor_data = []

try:
    # 1. Load the exhibitors page and wait for the table's tbody element
    logging.info(f"Loading exhibitors page: {base_exhibitors_url}")
    driver.get(base_exhibitors_url)
    # Use the full XPath to the table's tbody
    table_body_xpath = "/html/body/div[2]/div[3]/div[2]/div/div/div/section/div[5]/div[2]/div[1]/table/tbody"
    wait.until(EC.presence_of_element_located((By.XPATH, table_body_xpath)))
    table_body = driver.find_element(By.XPATH, table_body_xpath)
    
    rows = table_body.find_elements(By.TAG_NAME, "tr")
    logging.info(f"Found {len(rows)} rows in the exhibitors table.")

except Exception as e:
    logging.error(f"An error occurred: {e}")

   # 2. Build a list of exhibitor URLs from the table
    exhibitor_urls = []
    for i, row in enumerate(rows):
        if TEST_MODE and i >= ROWS_TO_SCRAPE:
            logging.info("Test mode enabled: Stopping after processing first few rows.")
            break
        try:
            # Use relative XPath to get the link element from the row
            link_elem = row.find_element(By.XPATH, ".//td[2]/span/a")
            url = link_elem.get_attribute("href")
            exhibitor_urls.append(url)
        except NoSuchElementException as e:
            logging.error(f"Could not find exhibitor link in row {i+1}: {e}")

    # 3. Iterate over each exhibitor URL to extract data
    for i, url in enumerate(exhibitor_urls):
        logging.info(f"Processing exhibitor {i+1} URL: {url}")
        driver.get(url)
        # Wait for the new page to load; here we wait for an element in the upper section of the page
        wait.until(EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/div[3]/div[1]")))
        
        try:
            # Extract data using the provided absolute XPaths:
            company_name_xpath = "/html/body/div[2]/div[3]/div[1]/div/div/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]"
            description_xpath  = "/html/body/div[2]/div[3]/div[1]/div/div/div/div/div/div/div[3]/div/div[1]/div[2]/div[2]"
            website_xpath      = "/html/body/div[2]/div[3]/div[1]/div/div/div/div/div/div/div[3]/div/div[1]/div[3]/div[2]"
            categories_xpath   = "/html/body/div[2]/div[3]/div[1]/div/div/div/div/div/div/div[3]/div/div[1]/div[4]/div[2]/div"
            
            company_name = driver.find_element(By.XPATH, company_name_xpath).text.strip()
            description  = driver.find_element(By.XPATH, description_xpath).text.strip()
            website      = driver.find_element(By.XPATH, website_xpath).text.strip()
            categories   = driver.find_element(By.XPATH, categories_xpath).text.strip()
            
            exhibitor_data.append({
                "Company Name": company_name,
                "Description": description,
                "Website": website,
                "Categories": categories
            })
        except Exception as e:
            logging.error(f"Error extracting data for exhibitor at {url}: {e}")

    # 4. Wait briefly if needed (2 seconds)
        time.sleep(2)

except Exception as e:
    logging.error(f"An error occurred during scraping: {str(e)}")
finally:
    driver.quit()

In [None]:
# -------------------------
# Ensure the output directory exists
# -------------------------
output_dir = os.path.dirname(output_csv)
os.makedirs(output_dir, exist_ok=True)

# -------------------------
# Save extracted data to CSV
# -------------------------
csv_headers = ["Company Name", "Description", "Website", "Categories"]
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=csv_headers)
    writer.writeheader()
    for item in exhibitor_data:
        writer.writerow(item)

logging.info(f"Scraping completed. Data saved to {output_csv}.")
print(f"Scraping completed. Data saved to {output_csv}.")
