In [1]:
from dotenv import load_dotenv

load_dotenv("../.env")

True

In [72]:
from functools import partial
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from typing import Any

log = print


class GPatentEngine:
    def __init__(self):
        # Set up the Chrome WebDriver
        options = Options()
        options.add_argument("--headless=new")
        self.driver = webdriver.Chrome(options=options)
        self.wait = WebDriverWait(driver=self.driver, timeout=10)

    def _selenium_patent_search(self,
                                destination,
                                wait_fn,
                                fetch_fn,
                                process_fn):
        self.driver.get(destination)
        wait_fn()

        patents = []

        # Parse through search results as they load
        previous_count = 0
        while True:
            # Get all currently loaded search result elements
            results = fetch_fn()

            # We're done? Exit loop
            if len(results) == previous_count:
                break

            # Process newly loaded elements
            for result in results[previous_count:]:
                try:
                    process_fn(result)
                except Exception as e:
                    log(f"Encountered error when parsing patent results: {e}")

            previous_count = len(results)

            # Scroll to bottom to trigger more results [optional]
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    def _patent_direct_search(self, query: str) -> list[str]:
        target = "https://patents.google.com/"
        
        def _wait_for_search_box(driver, wait):
            wait.until(EC.presence_of_element_located((By.NAME, "q")))
            search_box = driver.find_element(By.NAME, "q")  # the input box uses name="q"
            # Execute the search
            search_box.send_keys(query)
            search_box.send_keys(Keys.RETURN)
            # Wait for the first batch to load
            wait.until(EC.presence_of_element_located((By.XPATH, "//article[contains(@class, 'search-result-item')]/following::a[1]")))

        def _fetch_results(driver) -> list[Any]:
            return driver.find_elements(By.XPATH, "//state-modifier[contains(@class, 'search-result-item')]")

        patents = []
        def _process_fn(result) -> None:
            patents.append(result.get_attribute('data-result').split("/")[1])

        self._selenium_patent_search(destination=target,
                                     wait_fn=partial(_wait_for_search_box, driver=self.driver, wait=self.wait),
                                     fetch_fn=partial(_fetch_results, driver=self.driver),
                                     process_fn=_process_fn)

        return patents

    def _patent_internet_search(self, query: str) -> list[str]:
        target = "https://www.duckduckgo.com/"

        def _wait_for_search_box(driver, wait):
            wait.until(EC.presence_of_element_located((By.NAME, "q")))
            search_box = driver.find_element(By.NAME, "q")  # the input box uses name="q"
            # Execute the search
            search_box.send_keys(f"{query} site:patents.google.com")
            search_box.send_keys(Keys.RETURN)
            # Wait for the first batch to load
            wait.until(EC.presence_of_element_located((By.XPATH, "//article[@data-nrn='result']")))

        def _fetch_results(driver) -> list[Any]:
            return driver.find_elements(By.XPATH, "//article[@data-nrn='result']//a")

        patents = []
        def _process_fn(result) -> None:
            link_value = result.get_attribute("href")
            if link_value and link_value.startswith("https://patents.google.com"):
                patent = re.match(r".*/patent/(.*)/.*", link_value)
                if patent and patent.group(1) is not None:
                    patents.append(patent.group(1))

        self._selenium_patent_search(destination=target,
                                     wait_fn=partial(_wait_for_search_box, driver=self.driver, wait=self.wait),
                                     fetch_fn=partial(_fetch_results, driver=self.driver),
                                     process_fn=_process_fn)
        return patents

    def search(self, query: str) -> list[str]:
        patents: set[str] = set()

        for patent_candidate in self._patent_direct_search(query):
            if patent_candidate not in patents:
                patents.add(patent_candidate)
        for patent_candidate in self._patent_internet_search(query):
            if patent_candidate not in patents:
                patents.add(patent_candidate)
                
        return list(patents)

In [73]:
engine = GPatentEngine()
try:
    patents = engine.search("diaper for birds")
finally:
    engine.driver.quit()

print(patents)

['ES2592324B1', 'JP2598289B2', 'JP2025003953A', 'US9468565B2', 'KR102438451B1', 'JP1711666S', 'US20120037094A1', 'JP6131513B2', 'CN107047454B', 'JPH01215229A', 'US2882858A', 'CN103552788B', 'JP3138623U', 'JP1728611S', 'US9358191B2', 'JP2015112093A', 'CN201472694U', 'ES2592324A1', 'CN114126629B', 'US5934226A']


In [7]:
import os
from anthropic import Anthropic

client = Anthropic(
    api_key=os.environ.get("ANTHROPIC_API_KEY"),  # This is the default and can be omitted
)

message = client.messages.create(
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": "Hello, Claude",
        }
    ],
    model="claude-3-5-sonnet-latest",
)
print(message.content)

[TextBlock(citations=None, text='Hello! How can I help you today?', type='text')]
