In [None]:
# import libararies
import os
import time
import csv
import logging
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

# -------------------------
# Logging configuration
# -------------------------
logging.basicConfig(
    filename='scraping_errors.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# -------------------------
# Set up Selenium WebDriver using webdriver_manager
# -------------------------
chrome_options = Options()
chrome_options.add_argument('--headless')  # run in headless mode; comment out if debugging
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
wait = WebDriverWait(driver, 20)

In [None]:
# # -------------------------
# # Helper Function: Safe extraction for Selenium elements using XPath
# # -------------------------
# def safe_extract(driver, xpath, default="0"):
#     try:
#         element = driver.find_element(By.XPATH, xpath)
#         text = element.text.strip()
#         return text if text else default
#     except NoSuchElementException:
#         logging.error(f"Element not found for XPath: {xpath}")
#         return default

# def safe_extract_attribute(driver, xpath, attribute, default="0"):
#     try:
#         element = driver.find_element(By.XPATH, xpath)
#         attr_val = element.get_attribute(attribute)
#         return attr_val.strip() if attr_val else default
#     except NoSuchElementException:
#         logging.error(f"Element not found for XPath (attribute {attribute}): {xpath}")
#         return default


In [None]:
# -------------------------
# Main Scraper Configuration
# -------------------------
base_exhibitors_url = "https://kbis2025.smallworldlabs.com/exhibitors"
output_csv = r"C:\Users\jchan\csi360_fire_police\cabinet_vendors_list\cabinets\resources\output\vendor_list.csv"
exhibitor_data = []

try:
    # 1. Load the exhibitors page and wait for dynamic content
    logging.info(f"Loading exhibitors page: {base_exhibitors_url}")
    driver.get(base_exhibitors_url)
    wait.until(EC.presence_of_element_located((By.XPATH, "//table[contains(@class, 'table-sm')]")))
    
   
    # 2. Locate the table and its tbody element
    table = driver.find_element(By.XPATH, "//table[contains(@class, 'table-sm')]")
    tbody = table.find_element(By.TAG_NAME, "tbody")
    rows = tbody.find_elements(By.TAG_NAME, "tr")
    logging.info(f"Found {len(rows)} rows in the exhibitors table.")
    
    # 3. Iterate through each row in the table and extract data
    # Assuming the table columns are in the following order:
    # Company Name, Description, Founded, Website, Categories, Key Words
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")
        if len(cells) >= 6:
            data = {
                "Company Name": cells[0].text.strip() or "0",
                "Description": cells[1].text.strip() or "0",
                "Founded": cells[2].text.strip() or "0",
                "Website": cells[3].text.strip() or "0",
                "Categories": cells[4].text.strip() or "0",
                "Key Words": cells[5].text.strip() or "0"
            }
            exhibitor_data.append(data)
        else:
            logging.error(f"Row with unexpected number of cells: {len(cells)}")
    
    # 4. Wait briefly if needed (2 seconds)
        time.sleep(2)

except Exception as e:
    logging.error(f"An error occurred during scraping: {str(e)}")
finally:
    driver.quit()

In [None]:
# -------------------------
# Ensure the output directory exists
# -------------------------
output_dir = os.path.dirname(output_csv)
os.makedirs(output_dir, exist_ok=True)

# -------------------------
# Save extracted data to CSV
# -------------------------
csv_headers = ["Company Name", "Description", "Founded", "Website", "Categories", "Key Words"]
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=csv_headers)
    writer.writeheader()
    for item in exhibitor_data:
        writer.writerow(item)

logging.info(f"Scraping completed. Data saved to {output_csv}.")
print(f"Scraping completed. Data saved to {output_csv}.")
