In [4]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def scrape_patents(search_term, n_pages):
    # 1. Set up the Chrome WebDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # comment out if you want to see the browser
    driver = webdriver.Chrome(service=service, options=options)

    try:
        for page in range(1, n_pages + 1):
            # Construct the URL for the current page
            url = f"https://patents.google.com/?q=({search_term})&oq={search_term}&page={page}"            
            driver.get(url)

            try:
                # Wait for the results to load
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "section.search-results"))
                )

                # Grab all <article> elements that represent individual search results
                articles = driver.find_elements(By.CSS_SELECTOR, "article.result.style-scope.search-result-item")

                print(f"Found {len(articles)} results on page {page}.")
                
                for article in articles:
                    try:
                        # Extract the data-result attribute from each result
                        state_modifier = article.find_element(By.CSS_SELECTOR, "state-modifier.result-title.style-scope.search-result-item")
                        data_result = state_modifier.get_attribute("data-result")

                        # Extract the patent number
                        if data_result and data_result.startswith("patent/"):
                            parts = data_result.split("/")
                            if len(parts) >= 2:
                                patent_id = parts[1]
                                print(patent_id)
                    except Exception as e:
                        print("Skipping one result due to unexpected structure:", e)

            except Exception as e:
                print(f"Failed to process page {page}: {e}")

            # Optional: Delay to avoid being flagged as a bot
            time.sleep(2)

    finally:
        driver.quit()

if __name__ == "__main__":
    # Example usage
    search_term = "IUPAC"  # Replace with your search term
    n_pages = 2  # Number of pages to process
    scrape_patents(search_term, n_pages)


Found 10 results on page 1.
US11445724B2
US10544148B2
US9756858B2
US10959432B2
CN104203916B
EP3019476B1
EP3683213B1
US10435401B2
US9771365B2
US11358957B2
Found 10 results on page 2.
EP3183245B1
US9949483B2
EP3186249B1
EP3214939B1
JP7187527B2
US11072608B2
US12134612B2
US10494368B2
EP3060589B9
US20230322925A1
