In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import json



In [2]:
# Path to your local ChromeDriver
chrome_driver_path = "/Users/mehak/chromedriver/chromedriver"

In [3]:
# Set up the ChromeDriver service with your local path
service = Service(executable_path=chrome_driver_path)

# Set Chrome options (remove headless for debugging)
options = webdriver.ChromeOptions()
# options.add_argument('--headless')  # Remove this for debugging

# Initialize WebDriver
driver = webdriver.Chrome(service=service, options=options)

# Function to extract product identifiers from JSON-LD using Selenium
def get_product_identifiers_selenium(url):
    # Navigate to the page
    driver.get(url)

    try:
        # Wait for the <script> tag with JSON-LD data to load (up to 10 seconds)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//script[@type='application/ld+json']"))
        )

        # Get the page source after the page is fully loaded
        page_source = driver.page_source

        # Parse the rendered page source with BeautifulSoup
        soup = BeautifulSoup(page_source, 'lxml')

        # Look for JSON-LD structured data
        script_tag = soup.find('script', {'type': 'application/ld+json'})
        
        if script_tag:
            # Load the JSON content
            json_data = json.loads(script_tag.string)
            
            # Extract product identifiers
            product_data = {}
            if 'gtin13' in json_data:
                product_data['GTIN-13'] = json_data['gtin13']
            if 'model' in json_data:
                product_data['Model Number'] = json_data['model']
            if 'sku' in json_data:
                product_data['SKU'] = json_data['sku']
            if 'offers' in json_data:
                offers = json_data['offers']
                if 'lowPrice' in offers:
                    product_data['Low Price'] = offers['lowPrice']
                if 'highPrice' in offers:
                    product_data['High Price'] = offers['highPrice']

            # Print the extracted data
            if product_data:
                print("Extracted Product Identifiers:")
                for key, value in product_data.items():
                    print(f"{key}: {value}")
            else:
                print("No product identifiers found.")
        else:
            print("No JSON-LD structured data found.")
    except Exception as e:
        print(f"Error: {e}")

# Example usage:
url = "https://www.bestbuy.com/site/ge-profile-top-control-smart-built-in-stainless-steel-tub-dishwasher-with-3rd-rack-dedicated-jet-targeted-wash-and-42-dba-stainless-steel/6572540.p?skuId=6572540"
get_product_identifiers_selenium(url)


No product identifiers found.


In [4]:
# Don't forget to close the driver when you're done
driver.quit()