In [None]:
# Patent Novelty and Obviousness Checking

## Dataset and API Information:
We are using the Lens.org API to access patent data, which allows searching patents globally. The results are compared against a new patent claim using text similarity.

## Patent Search Function
The `search_patents_lens()` function sends a request to the Lens.org API to search for related patents based on a query.

## Prior Art Retrieval
The `retrieve_prior_art()` function fetches related patents for comparison.

## Similarity Calculation using LLMs
We use a pre-trained Hugging Face model for semantic similarity and calculate cosine similarity using TF-IDF vectors.

## Novelty Check Pipeline
The `patent_novelty_check()` function brings it all together, helping us check the novelty of a new patent claim.


In [2]:
import torch
import transformers

print(torch.__version__)
print(transformers.__version__)

1.13.1
4.44.2


In [4]:
%pip install transformers --force-reinstall

Collecting transformers
  Using cached transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Downloading huggingface_hub-0.25.1-py3-none-any.whl.metadata (13 kB)
Collecting numpy>=1.17 (from transformers)
  Using cached numpy-2.1.1-cp310-cp310-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting packaging>=20.0 (from transformers)
  Using cached packaging-24.1-py3-none-any.whl.metadata (3.2 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)
Collecting requests (from transformers)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Using cached safetensors-

In [7]:
pip install torch -U

Collecting torch
  Downloading torch-2.4.1-cp310-none-macosx_11_0_arm64.whl.metadata (26 kB)
Downloading torch-2.4.1-cp310-none-macosx_11_0_arm64.whl (62.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 1.13.1
    Uninstalling torch-1.13.1:
      Successfully uninstalled torch-1.13.1
Successfully installed torch-2.4.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Required Libraries
import requests
import pandas as pd
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# 1. Fetch Patent Data from Lens.org API (requires an API key)
def search_patents_lens(query, api_key):
    """
    Searches Lens.org for patents based on a query.
    Requires Lens.org API key for access.
    """
    url = "https://api.lens.org/scholarly/search"
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }
    payload = {
        "query": {
            "bool": {
                "must": [
                    {"match": {"text": query}}
                ]
            }
        },
        "size": 10
    }
    response = requests.post(url, json=payload, headers=headers)
    return response.json()

# 2. Retrieve Prior Art and Novelty Comparison
def retrieve_prior_art(patent_title, api_key):
    """
    Retrieves patents related to a patent title using the Lens.org API.
    """
    patents = search_patents_lens(patent_title, api_key)
    if 'data' in patents:
        return [patent['title'] for patent in patents['data']]
    return []

# 3. LLM for Semantic Similarity (Using Hugging Face Pipeline)
def check_novelty(new_patent_claim, prior_art_patents):
    """
    Uses an LLM to check novelty by comparing new patent claims to prior art.
    """
    # Initialize HuggingFace pipeline for text similarity
    similarity_model = pipeline('feature-extraction', model='sentence-transformers/all-MiniLM-L6-v2')

    # Transform the new patent claim and the prior art patents
    new_claim_embedding = similarity_model(new_patent_claim)[0]
    prior_art_embeddings = [similarity_model(patent)[0] for patent in prior_art_patents]

    # Compare similarity using cosine similarity
    vectorizer = TfidfVectorizer()
    corpus = [new_patent_claim] + prior_art_patents
    tfidf_matrix = vectorizer.fit_transform(corpus)
    
    # Compute cosine similarities
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
    
    return cosine_sim

# 4. Full Pipeline for Patent Novelty Check
def patent_novelty_check(patent_claim, api_key):
    """
    Pipeline to check novelty of a patent claim against existing patents.
    """
    prior_art = retrieve_prior_art(patent_claim, api_key)
    
    if prior_art:
        similarities = check_novelty(patent_claim, prior_art)
        print("Cosine Similarities to Prior Art: ", similarities)
    else:
        print("No prior art found for comparison.")

# Example usage:
api_key = "your_lens_org_api_key"
patent_claim = "A method for enhancing machine learning model performance by using transfer learning techniques."
patent_novelty_check(patent_claim, api_key)



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/anaconda3/envs/py310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/anaconda3/envs/py310/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda3/envs/py310/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/envs/py310/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/anaconda3/envs/py310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/anaconda3/envs/py310/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda3/envs/py310/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/envs/py310/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/anaconda3/envs/py310/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/anaconda3/envs/py310/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda3/envs/py310/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/envs/py310/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start

AttributeError: _ARRAY_API not found

ImportError: numpy.core._multiarray_umath failed to import

RuntimeError: Failed to import transformers.pipelines because of the following error (look up to see its traceback):
numpy.core.umath failed to import

In [None]:
# Patent Novelty and Obviousness Checking with Multiple Data Sources

## Dataset and API Information:
This version allows you to choose between various patent data sources:
- **Lens.org**: API-based patent search, requires API key.
- **USPTO Bulk Data**: Public patent data from the US Patent Office, no key required.
- **Google Patents**: Requires a Google Cloud Project for BigQuery.
- **WIPO Patentscope**: Global patent search, requires API key.

## Patent Search Function
The `search_patents()` function selects the appropriate data source based on the flag set at the beginning of the code.

## Prior Art Retrieval
The `retrieve_prior_art()` function uses the selected API to pull related patents for comparison.

## Similarity Calculation using LLMs
We use a pre-trained Hugging Face model for semantic similarity and calculate cosine similarity using TF-IDF vectors.

## Novelty Check Pipeline
The `patent_novelty_check()` function brings it all together, helping us check the novelty of a new patent claim against prior art from different data sources.


In [None]:
# Required Libraries
import requests
import pandas as pd
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# 1. Search Patents from Different Data Sources
def search_patents(query, api_key=None, source="lens"):
    """
    Searches for patents from different sources based on a flag.
    Possible sources: 'lens', 'uspto', 'google_patents', 'wipo'
    """
    if source == "lens":
        return search_patents_lens(query, api_key)
    elif source == "uspto":
        return search_patents_uspto(query)
    elif source == "google_patents":
        return search_patents_google(query)
    elif source == "wipo":
        return search_patents_wipo(query, api_key)
    else:
        raise ValueError("Invalid source provided. Choose from: 'lens', 'uspto', 'google_patents', 'wipo'.")

# 2. Fetch Patent Data from Lens.org API (requires an API key)
def search_patents_lens(query, api_key):
    """
    Searches Lens.org for patents based on a query.
    Requires Lens.org API key for access.
    """
    url = "https://api.lens.org/scholarly/search"
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }
    payload = {
        "query": {
            "bool": {
                "must": [
                    {"match": {"text": query}}
                ]
            }
        },
        "size": 10
    }
    response = requests.post(url, json=payload, headers=headers)
    return response.json()

# 3. Fetch Patent Data from USPTO
def search_patents_uspto(query):
    """
    Searches USPTO database for patents using bulk data search.
    """
    # USPTO Bulk Data URL (no API key required, but filtering might be complex)
    url = f"https://developer.uspto.gov/ibd-api/v1/patent/application?searchText={query}&rows=10"
    response = requests.get(url)
    return response.json()

# 4. Fetch Patent Data from Google Patents (via BigQuery)
def search_patents_google(query):
    """
    Searches Google Patents Public Dataset using BigQuery API.
    Requires setting up a Google Cloud Project.
    """
    # Simplified version without actual BigQuery (Google Cloud setup required for real queries)
    url = f"https://patents.google.com/?q={query}&num=10"
    response = requests.get(url)
    # Parse HTML or use BigQuery for more structured data
    return response.text

# 5. Fetch Patent Data from WIPO Patentscope API (requires an API key)
def search_patents_wipo(query, api_key):
    """
    Searches WIPO Patentscope for patents using the API.
    Requires WIPO API key.
    """
    url = f"https://patentscope.wipo.int/search-api/rest/patents?query={query}&rows=10"
    headers = {
        'Authorization': f'Bearer {api_key}'
    }
    response = requests.get(url, headers=headers)
    return response.json()

# 6. Retrieve Prior Art and Novelty Comparison
def retrieve_prior_art(patent_title, api_key, source):
    """
    Retrieves patents related to a patent title using the selected API source.
    """
    patents = search_patents(patent_title, api_key, source)
    if 'data' in patents:
        return [patent['title'] for patent in patents['data']]
    elif isinstance(patents, dict) and 'results' in patents:
        return [result['inventionTitle'] for result in patents['results']]
    else:
        return []

# 7. LLM for Semantic Similarity (Using Hugging Face Pipeline)
def check_novelty(new_patent_claim, prior_art_patents):
    """
    Uses an LLM to check novelty by comparing new patent claims to prior art.
    """
    # Initialize HuggingFace pipeline for text similarity
    similarity_model = pipeline('feature-extraction', model='sentence-transformers/all-MiniLM-L6-v2')

    # Transform the new patent claim and the prior art patents
    new_claim_embedding = similarity_model(new_patent_claim)[0]
    prior_art_embeddings = [similarity_model(patent)[0] for patent in prior_art_patents]

    # Compare similarity using cosine similarity
    vectorizer = TfidfVectorizer()
    corpus = [new_patent_claim] + prior_art_patents
    tfidf_matrix = vectorizer.fit_transform(corpus)
    
    # Compute cosine similarities
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
    
    return cosine_sim

# 8. Full Pipeline for Patent Novelty Check
def patent_novelty_check(patent_claim, api_key, source):
    """
    Pipeline to check novelty of a patent claim against existing patents.
    Allows switching between different sources via a flag.
    """
    prior_art = retrieve_prior_art(patent_claim, api_key, source)
    
    if prior_art:
        similarities = check_novelty(patent_claim, prior_art)
        print("Cosine Similarities to Prior Art: ", similarities)
    else:
        print("No prior art found for comparison.")

# Example usage:
api_key = "your_lens_org_or_wipo_api_key"
patent_claim = "A method for enhancing machine learning model performance by using transfer learning techniques."
source = "lens"  # Choose from 'lens', 'uspto', 'google_patents', 'wipo'
patent_novelty_check(patent_claim, api_key, source)
