In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

driver_path = '/Users/gremlin_coco/.wdm/drivers/chromedriver/mac64/131.0.6778.87/chromedriver-mac-x64/chromedriver'

driver = webdriver.Chrome(service=Service(driver_path))

driver.get("https://exhibitors.ces.tech/8_0/#/searchtype/category/search/217/show/all")

wait = WebDriverWait(driver, 30)  # Wait up to 30 seconds

wait.until(EC.presence_of_element_located((By.CLASS_NAME, "card-Title")))

def scroll_to_bottom():
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        # Scroll down by a small amount
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Wait for new content to load
        
        # Calculate new scroll height and compare it with the last height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

scroll_to_bottom()

soup = BeautifulSoup(driver.page_source, 'html.parser')
exhibitors = []


def scrape_exhibitors_from_starting_point(starting_name="10minds Co., Ltd."):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    exhibitors = []

    count = 0  
    start_scraping = False 
        
    for exhibitor in soup.find_all('h3'):
        exhibitor_name = exhibitor.find('a').text.strip() if exhibitor.find('a') else 'No name available'

        if exhibitor_name == starting_name:
            start_scraping = True  

        if start_scraping:
            exhibitor_link = 'No link available'
            exhibitor_link_tag = exhibitor.find('a')
            if exhibitor_link_tag:
                exhibitor_link = exhibitor_link_tag['href']

            exhibitors.append({
                'name': exhibitor_name,
                'link': exhibitor_link
            })

    return exhibitors

exhibitors = scrape_exhibitors_from_starting_point("10minds Co., Ltd.")  # Start from the exhibitor you want


fieldnames = ['name', 'booth_number', 'address', 'website', 'linkedin', 'description', 'product_categories','link','summary']

with open('CES_2025.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()  # Write header row

    for exhibitor in exhibitors:
        for key in fieldnames:
            exhibitor.setdefault(key, None)
        writer.writerow(exhibitor)

    for exhibitor in exhibitors:
        driver.get(f"https://exhibitors.ces.tech/{exhibitor['link']}") 
        
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'showcase-address')))

        exhibitor_page_soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # Booth Number
        booth_number = driver.find_element(By.CSS_SELECTOR, 'a#newfloorplanlink').text.strip()
        exhibitor['booth_number'] = booth_number

        # Description
        description = driver.find_element(By.CSS_SELECTOR, '#scroll-description .js-read-more').text.strip()
        exhibitor['description'] = description

        # Product Categories
        categories_elements = driver.find_elements(By.CSS_SELECTOR, '#scroll-products .grid__centered h2 a')
        product_categories = [category.text.strip() for category in categories_elements]
        exhibitor['product_categories'] = ', '.join(product_categories)

        # Address
        address = driver.find_element(By.CSS_SELECTOR, '.showcase-address').text.replace('\n', ', ').strip()
        exhibitor['address'] = address

        # Website
        website_element = driver.find_element(By.CSS_SELECTOR, '.showcase-web-phone a')
        website = website_element.get_attribute('href')
        exhibitor['website'] = website       
        
        try:
            linkedin_element = driver.find_element(By.CSS_SELECTOR, '.showcase-social a[title*="LinkedIn"]')
            linkedin = linkedin_element.get_attribute('href')
        except:
            linkedin = None

        exhibitor['linkedin'] = linkedin
            
        exhibitor_data = {key: exhibitor[key] for key in fieldnames}

        writer.writerow(exhibitor_data)

driver.quit()

import pandas as pd
df = pd.read_csv('CES_2025.csv')
df = df.drop_duplicates()
df.to_csv('CES_2025_unique.csv', index=False)