In [None]:
#Install package and load the extension
%pip install -U datasets langchain langchain-community langchain-core neo4j colab-xterm
%load_ext colabxterm

In [None]:
## refresh xterm if necessary
# %reload_ext colabxterm

```
curl -fsSL https://ollama.com/install.sh | sh
```


```
ollama serve & ollama pull llama3
```


In [2]:
%xterm

UsageError: Line magic function `%xterm` not found.


In [1]:
#in case for testing
from langchain_community.llms import Ollama
llm = Ollama(model = "llama3")
llm.invoke("Tell me 3 red flower names.")

'Here are three red flower names:\n\n1. Poppy\n2. Hollyhock\n3. Tulipa (a type of tulip, which comes in a deep red color)'

In [3]:
from langchain.chains import GraphCypherQAChain, LLMChain
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

from langchain_community.chat_models import ChatOllama
from langchain_community.llms import Ollama
from langchain_community.graphs import Neo4jGraph
from langchain_community.llms import Ollama
from langchain_community.graphs.graph_document import Node as BaseNode, Relationship as BaseRelationship, GraphDocument

from langchain.schema import Document

from pydantic import Field, BaseModel
from typing import List, Optional
import json
from neo4j import GraphDatabase
import re
from datasets import load_dataset

# Load the HUPD dataset
dataset_dict = load_dataset('HUPD/hupd',
    name='sample',
    data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
    icpr_label=None,
    train_filing_start_date='2016-01-01',
    train_filing_end_date='2016-01-21',
    val_filing_start_date='2016-01-22',
    val_filing_end_date='2016-01-31',
    trust_remote_code=True
)

patents = dataset_dict['train']

In [4]:
g_patents = patents.filter(lambda patent: patent['cpc_label'][0][0] == 'G')

In [5]:
import pandas as pd
# Convert to DataFrame
g_patents_df = pd.DataFrame(g_patents)

# Sample 10 random patent numbers
random_patents = g_patents_df['patent_number'].sample(10, replace=True)  # use replace=True if you have fewer than 10 patents

# Display the randomly selected patent numbers
print("Randomly selected 10 patent numbers:")
print(random_patents.tolist())

Randomly selected 10 patent numbers:
['14997498', '14991139', '14988935', '15000024', '14990834', '14988312', '14992233', '14993202', '14992385', '15001400']


## first patent

In [7]:
def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14997116"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


# Initialize the model
llm = ChatOllama(model="llama3")
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting nodes and entities from patent documents.
Extract key entities, their types (person, location, concept, etc.), and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
```
Entities:
- Entity: Type
- Entity: Type
- ...

Relationships:
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
- ...
```

"""

# Create a prompt template
prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

        # Create relationships
        for relationship in relationships:
            parts = relationship.split("->")
            if len(parts) == 3:
                entity1 = parts[0].strip()
                relation = clean_relationship_type(parts[1].strip())
                entity2 = parts[2].strip()
                print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
                session.run(
                    f"""
                    MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                    MERGE (e1)-[r:{relation}]->(e2)
                    RETURN r
                    """,
                    {"entity1": entity1, "entity2": entity2}
                )

In [None]:
if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

        # Parse the response to extract entities and relationships
        entities, relationships = parse_response(response)
        print(f"Entities: {entities}")
        print(f"Relationships: {relationships}")
else:
    print(f"Patent with number {patent_number} not found.")

Here are the extracted key entities and their relationships:

**Entities:**

* Computer system (Type: System)
* Machine learning model (Type: Model)
* Resume corpus (Type: Corpus)
* User (Type: Person)
* Organization (Type: Organization)
* Job title (Type: Title)
* Profile information (Type: Information)
* Social networking system (Type: System)

**Relationships:**

* Computer system -> Trains -> Machine learning model
* Computer system -> Receives -> Resume corpus
* User -> Associated with -> Employee of an organization
* User -> Connected to -> Employee on a social networking system
* Machine learning model -> Based on -> Terms from resume corpus
* Job title -> Recruited for by -> Organization
* Profile information -> Provided to -> Machine learning model
* Computer system -> Determines -> Job title for user based on profile information

Let me know if you'd like me to extract any specific types of entities or relationships!
Entities: [('Computer system', 'Type: System'), ('Machine l

### Construction of graphs

In [None]:
url = "neo4j+s://..." #neo4j database url
username = "neo4j"
password = "" #token

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Computer system with type: Type__System
Creating entity: Machine learning model with type: Type__Model
Creating entity: Resume corpus with type: Type__Corpus
Creating entity: User with type: Type__Person
Creating entity: Organization with type: Type__Organization
Creating entity: Job title with type: Type__Title
Creating entity: Profile information with type: Type__Information
Creating entity: Social networking system with type: Type__System
Creating relationship: Computer system -[:Trains]-> Machine learning model
Creating relationship: Computer system -[:Receives]-> Resume corpus
Creating relationship: User -[:Associated_with]-> Employee of an organization
Creating relationship: User -[:Connected_to]-> Employee on a social networking system
Creating relationship: Machine learning model -[:Based_on]-> Terms from resume corpus
Creating relationship: Job title -[:Recruited_for_by]-> Organization
Creating relationship: Profile information -[:Provided_to]-> Machine learni

### Summarization

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_community.graphs import Neo4jGraph
from neo4j import GraphDatabase

# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)


In [None]:
# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)


  warn_deprecated(
  warn_deprecated(


Based on the provided entities and relationships, I can generate a summary for you. Here it is:

**Summary**

A machine learning model was trained to analyze profile information from job applicants, which was provided to the model via a computer system. The computer system received resumes as input data and used this information to train the model. This training process enables the model to learn patterns and correlations between resume data and job title. The organization recruited for by a specific job title, leveraging the trained machine learning model to improve its hiring processes.

Let me know if you'd like me to expand or clarify anything!


In [None]:
print(type(summary_response))

<class 'str'>


In [None]:
summary_response

"Based on the provided entities and relationships, I can generate a summary for you. Here it is:\n\n**Summary**\n\nA machine learning model was trained to analyze profile information from job applicants, which was provided to the model via a computer system. The computer system received resumes as input data and used this information to train the model. This training process enables the model to learn patterns and correlations between resume data and job title. The organization recruited for by a specific job title, leveraging the trained machine learning model to improve its hiring processes.\n\nLet me know if you'd like me to expand or clarify anything!"

In [None]:
first_double_newline = summary_response.find("\n\n")
first_double_newline

95

In [None]:
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]

In [None]:
summary_response = summary_response.replace("\n\n", "")

In [None]:
summary_response

"**Summary**A machine learning model was trained to analyze profile information from job applicants, which was provided to the model via a computer system. The computer system received resumes as input data and used this information to train the model. This training process enables the model to learn patterns and correlations between resume data and job title. The organization recruited for by a specific job title, leveraging the trained machine learning model to improve its hiring processes.Let me know if you'd like me to expand or clarify anything!"

In [None]:
summary_response = summary_response.replace("**Summary**", "")
summary_response

"A machine learning model was trained to analyze profile information from job applicants, which was provided to the model via a computer system. The computer system received resumes as input data and used this information to train the model. This training process enables the model to learn patterns and correlations between resume data and job title. The organization recruited for by a specific job title, leveraging the trained machine learning model to improve its hiring processes.Let me know if you'd like me to expand or clarify anything!"

In [None]:
summary_response = summary_response.replace("Let me know if you'd like me to expand or clarify anything!", "")
summary_response

'A machine learning model was trained to analyze profile information from job applicants, which was provided to the model via a computer system. The computer system received resumes as input data and used this information to train the model. This training process enables the model to learn patterns and correlations between resume data and job title. The organization recruited for by a specific job title, leveraging the trained machine learning model to improve its hiring processes.'

In [None]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:
reference_summary = patent['abstract']
reference_summary

'Systems, methods, and non-transitory computer readable media are configured to receive a resume corpus. A machine learning model is trained based on terms from the resume corpus. A job title for a user is determined based on profile information provided to the model.'

In [None]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patents['abstract'][1]

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")


{'rouge-1': {'r': 0.1, 'p': 0.13725490196078433, 'f': 0.11570247446212711}, 'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0}, 'rouge-l': {'r': 0.1, 'p': 0.13725490196078433, 'f': 0.11570247446212711}} 



In [None]:
basic_rag_sum = "The invention relates to a system and method for determining a job title for a user based on their profile information. A machine learning model is trained using terms from a resume corpus, and then the user's profile information is provided to the model to determine their most likely job title. The model uses techniques such as vector space creation to represent terms in the resume corpus, and anchor points are used to identify the closest match for each term."

rouge = Rouge()
scores = rouge.get_scores(basic_rag_sum, reference_summary)

for score in scores:
  print(score,"\n")


{'rouge-1': {'r': 0.22857142857142856, 'p': 0.2962962962962963, 'f': 0.2580645112122789}, 'rouge-2': {'r': 0.022727272727272728, 'p': 0.04, 'f': 0.028985502625499582}, 'rouge-l': {'r': 0.2, 'p': 0.25925925925925924, 'f': 0.22580644669614994}} 



**Original Abstract From Patent Document**

Systems, methods, and non-transitory computer readable media are configured to receive a resume corpus. A machine learning model is trained based on terms from the resume corpus. A job title for a user is determined based on profile information provided to the model.



**Knowledge RAG Summary from Graph Database**

A machine learning model was trained to analyze profile information from job applicants, which was provided to the model via a computer system. The computer system received resumes as input data and used this information to train the model. This training process enables the model to learn patterns and correlations between resume data and job title. The organization recruited for by a specific job title, leveraging the trained machine learning model to improve its hiring processes.



**Basic RAG Summary from Vector Database**

The invention relates to a system and method for determining a job title for a user based on their profile information. A machine learning model is trained using terms from a resume corpus, and then the user's profile information is provided to the model to determine their most likely job title. The model uses techniques such as vector space creation to represent terms in the resume corpus, and anchor points are used to identify the closest match for each term.

## Second patent

In [None]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.5)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities and relationships from patent documents.
Do not produce a sentence when extracting the relationships.
Extract key entities, their types and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
Entities:
- Entity: Type
- Entity: Type
- ...

Relationships:
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

In [None]:
def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14997116"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

In [None]:
if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

I'd be happy to help you extract entities and relationships from this patent document. Here's what I've found:

**Entities:**

* Apparatus (Type: Device)
* Brand identifier detector (Type: Component)
* Measure and tracking module (Type: Component)
* Report generator (Type: Component)
* Media stream (Type: Data)
* Scene (Type: Data)
* Reference scene (Type: Data)
* First brand identifier (Type: Entity)
* Processor (Type: Device)
* Interface circuit (Type: Component)
* Input devices (Type: Devices)
* Output devices (Type: Devices)
* Mass storage devices (Type: Devices)

**Relationships:**

* Apparatus -> contains -> Brand identifier detector
* Apparatus -> contains -> Measure and tracking module
* Apparatus -> contains -> Report generator
* Media stream -> contained in -> Scene
* Reference scene -> contained in -> First brand identifier
* Processor -> connected to -> Interface circuit
* Input devices -> connected to -> Interface circuit
* Output devices -> connected to -> Interface circu

In [None]:
import re

def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \(Type: (.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships

response = """
**Entities:**

* Apparatus (Type: Device)
* Brand identifier detector (Type: Component)
* Measure and tracking module (Type: Component)
* Report generator (Type: Component)
* Media stream (Type: Data)
* Scene (Type: Data)
* Reference scene (Type: Data)
* First brand identifier (Type: Entity)
* Processor (Type: Device)
* Interface circuit (Type: Component)
* Input devices (Type: Devices)
* Output devices (Type: Devices)
* Mass storage devices (Type: Devices)

**Relationships:**

* Apparatus -> contains -> Brand identifier detector
* Apparatus -> contains -> Measure and tracking module
* Apparatus -> contains -> Report generator
* Media stream -> contained in -> Scene
* Reference scene -> contained in -> First brand identifier
* Processor -> connected to -> Interface circuit
* Input devices -> connected to -> Interface circuit
* Output devices -> connected to -> Interface circuit
* Mass storage devices -> connected to -> Apparatus
"""

entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)


Entities:
('Apparatus', 'Device')
('Brand identifier detector', 'Component')
('Measure and tracking module', 'Component')
('Report generator', 'Component')
('Media stream', 'Data')
('Scene', 'Data')
('Reference scene', 'Data')
('First brand identifier', 'Entity')
('Processor', 'Device')
('Interface circuit', 'Component')
('Input devices', 'Devices')
('Output devices', 'Devices')
('Mass storage devices', 'Devices')

Relationships:
('Apparatus', 'contains', 'Brand identifier detector')
('Apparatus', 'contains', 'Measure and tracking module')
('Apparatus', 'contains', 'Report generator')
('Media stream', 'contained in', 'Scene')
('Reference scene', 'contained in', 'First brand identifier')
('Processor', 'connected to', 'Interface circuit')
('Input devices', 'connected to', 'Interface circuit')
('Output devices', 'connected to', 'Interface circuit')
('Mass storage devices', 'connected to', 'Apparatus')


In [None]:
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Apparatus with type: Device
Creating entity: Brand identifier detector with type: Component
Creating entity: Measure and tracking module with type: Component
Creating entity: Report generator with type: Component
Creating entity: Media stream with type: Data
Creating entity: Scene with type: Data
Creating entity: Reference scene with type: Data
Creating entity: First brand identifier with type: Entity
Creating entity: Processor with type: Device
Creating entity: Interface circuit with type: Component
Creating entity: Input devices with type: Devices
Creating entity: Output devices with type: Devices
Creating entity: Mass storage devices with type: Devices
Creating relationship: Apparatus -[:contains]-> Brand identifier detector
Creating relationship: Apparatus -[:contains]-> Measure and tracking module
Creating relationship: Apparatus -[:contains]-> Report generator
Creating relationship: Media stream -[:contained_in]-> Scene
Creating relationship: Reference scene -[:c

In [None]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

  warn_deprecated(
  warn_deprecated(


Here is a summary of the patent document based on the extracted information:

A multimedia processing apparatus is disclosed, which includes a processor connected to an interface circuit that receives input from input devices and outputs data to output devices. The apparatus also includes mass storage devices for storing media streams. Each media stream is contained within a scene.

The apparatus further includes a report generator, brand identifier detector, and measure and tracking module, all of which are contained within the apparatus. The brand identifier detector detects the presence or absence of a first brand identifier in a reference scene, which is also contained within the apparatus.

In summary, this patent describes an apparatus that processes multimedia data, including detecting and tracking brand identifiers within scenes.


In [None]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")

In [None]:
summary_response

'A multimedia processing apparatus is disclosed, which includes a processor connected to an interface circuit that receives input from input devices and outputs data to output devices. The apparatus also includes mass storage devices for storing media streams. Each media stream is contained within a scene.The apparatus further includes a report generator, brand identifier detector, and measure and tracking module, all of which are contained within the apparatus. The brand identifier detector detects the presence or absence of a first brand identifier in a reference scene, which is also contained within the apparatus.In summary, this patent describes an apparatus that processes multimedia data, including detecting and tracking brand identifiers within scenes.'

In [None]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")

{'rouge-1': {'r': 0.5094339622641509, 'p': 0.38571428571428573, 'f': 0.43902438533941435}, 'rouge-2': {'r': 0.17525773195876287, 'p': 0.1650485436893204, 'f': 0.16999999500450017}, 'rouge-l': {'r': 0.49056603773584906, 'p': 0.37142857142857144, 'f': 0.42276422273778835}} 



In [None]:
reference_summary

'Methods and apparatus to measure brand exposure in media streams are disclosed. An example apparatus disclosed herein includes a brand identifier detector to compare first data associated with a first scene of a media stream with second data associated with a reference scene including a first brand identifier to detect the first brand identifier in the first scene of the media stream. The example apparatus also includes a measure and tracking module to combine respective locations of the first brand identifier in respective frames of a first sequence of image frames forming the first scene to determine a weighted location for the first brand identifier. The example apparatus further includes a report generator to report appearance data corresponding to the first brand identifier, the appearance data including the weighted location for the first brand identifier.'

## Third patent

In [8]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.1)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14986817"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

I'd be happy to help you extract entities and relationships from this patent document. Here's the output:

**Entities**

* Organization (Type: Company)
* Publisher (Type: Individual or Company)
* Compute Device (Type: Computer)
* Product/Service (Type: Intangible Good)
* Topic (Type: Concept)
* Location (Type: Geographic)

**Relationships**

* Organization -> Published By -> Publisher
* Publisher -> Associated With -> Organization
* Organization -> Has Market Presence In -> Location
* Product/Service -> Associated With -> Organization
* Topic -> Detected From -> Content
* Location -> Associated With -> Organization
* Organization -> Compared To -> Second Organization (in claims 3 and 12)
* Product/Service -> Compared To -> Second Product/Service (in claim 12)

Let me know if you'd like me to extract any specific entities or relationships from this document!


In [11]:
import re

def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \(Type: (.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships

# Example response from the Llama3 model
response = """
**Entities**

* Organization (Type: Company)
* Publisher (Type: Individual or Company)
* Compute Device (Type: Computer)
* Product/Service (Type: Intangible Good)
* Topic (Type: Concept)
* Location (Type: Geographic)

**Relationships**

* Organization -> Published By -> Publisher
* Publisher -> Associated With -> Organization
* Organization -> Has Market Presence In -> Location
* Product/Service -> Associated With -> Organization
* Topic -> Detected From -> Content
* Location -> Associated With -> Organization
* Organization -> Compared To -> Second Organization (in claims 3 and 12)
* Product/Service -> Compared To -> Second Product/Service (in claim 12)
"""

entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)


Entities:
('Organization', 'Company')
('Publisher', 'Individual or Company')
('Compute Device', 'Computer')
('Product/Service', 'Intangible Good')
('Topic', 'Concept')
('Location', 'Geographic')

Relationships:
('Organization', 'Published By', 'Publisher')
('Publisher', 'Associated With', 'Organization')
('Organization', 'Has Market Presence In', 'Location')
('Product/Service', 'Associated With', 'Organization')
('Topic', 'Detected From', 'Content')
('Location', 'Associated With', 'Organization')
('Organization', 'Compared To', 'Second Organization (in claims 3 and 12)')
('Product/Service', 'Compared To', 'Second Product/Service (in claim 12)')


In [12]:
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Organization with type: Company
Creating entity: Publisher with type: Individual_or_Company
Creating entity: Compute Device with type: Computer
Creating entity: Product/Service with type: Intangible_Good
Creating entity: Topic with type: Concept
Creating entity: Location with type: Geographic
Creating relationship: Organization -[:Published_By]-> Publisher
Creating relationship: Publisher -[:Associated_With]-> Organization
Creating relationship: Organization -[:Has_Market_Presence_In]-> Location
Creating relationship: Product/Service -[:Associated_With]-> Organization
Creating relationship: Topic -[:Detected_From]-> Content
Creating relationship: Location -[:Associated_With]-> Organization
Creating relationship: Organization -[:Compared_To]-> Second Organization (in claims 3 and 12)
Creating relationship: Product/Service -[:Compared_To]-> Second Product/Service (in claim 12)


In [14]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Based on the extracted information, here is a summary of the patent document:

**Organization**: [Name], a leading innovator in [industry/field], has developed a groundbreaking [Product/Service] that revolutionizes the way [related field/service] is approached.

**Location**: Headquartered in [Location], [Organization] has established itself as a major player in the global market, with a strong presence in [specific geographic region].

**Publisher**: The patent document was published by [Publisher], a renowned authority on intellectual property and innovation.

**Summary**: The [Product/Service] is designed to [briefly describe the product/service's functionality or benefits]. With its unique features and advantages, this innovative solution has the potential to transform the industry and improve customer experiences.

**Market Presence**: As a prominent organization in the field, [Organization] has a significant market presence in [Location], with a strong reputation for delivering h

In [15]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")

In [16]:
summary_response

"**Organization**: [Name], a leading innovator in [industry/field], has developed a groundbreaking [Product/Service] that revolutionizes the way [related field/service] is approached.**Location**: Headquartered in [Location], [Organization] has established itself as a major player in the global market, with a strong presence in [specific geographic region].**Publisher**: The patent document was published by [Publisher], a renowned authority on intellectual property and innovation.: The [Product/Service] is designed to [briefly describe the product/service's functionality or benefits]. With its unique features and advantages, this innovative solution has the potential to transform the industry and improve customer experiences.**Market Presence**: As a prominent organization in the field, [Organization] has a significant market presence in [Location], with a strong reputation for delivering high-quality products and services.Please let me know if you would like me to add or modify any in

In [18]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [19]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print(reference_summary)

{'rouge-1': {'r': 0.23076923076923078, 'p': 0.09, 'f': 0.12949639884063982}, 'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0}, 'rouge-l': {'r': 0.20512820512820512, 'p': 0.08, 'f': 0.11510790963200676}} 

The apparatus comprises at least one database for storing digital information associated with at least a first user and a second user, at least one links module for the first user and for the second user, at least one manipulations module and at least one display device for respectively displaying at least said linked digital information wherein said linked digital information associated with said first user is automatically updated on at least said display device of said second user when that information is manipulated, and wherein said linked digital information associated with said second user is automatically updated on at least said display device of said first user when that information is manipulated.


## Fourth Patent

In [25]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.1)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14990669"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

Based on the patent document, I've extracted the following entities and relationships:

**Entities**

* Social networking system (System)
* Places (Places)
* Check-in posts (Check-ins)
* Posters (Posters)
* Users (Users)
* Search query (Query)

**Relationships**

* System -> Monitors -> Check-ins
* System -> Identifies -> Characteristics of Places
* System -> Receives -> Query from User
* System -> Provides -> Results based on Query and Place Characteristics
* Check-ins -> Associated with -> Places
* Posters -> Associated with -> Check-ins
* Users -> Tagged in -> Check-ins
* System -> Analyzes -> Data associated with Check-ins
* System -> Performs -> Text analysis on text associated with Check-ins
* System -> Performs -> Image analysis on digital media associated with Check-ins
* System -> Generates -> Multi-step query tool based on User activity

Let me know if you'd like me to extract any additional information!


In [30]:
import re

def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \((.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships

# Example response from the Llama3 model
response = """
**Entities**

**Entities**

* Social networking system (System)
* Places (Places)
* Check-in posts (Check-ins)
* Posters (Posters)
* Users (Users)
* Search query (Query)

**Relationships**

* System -> Monitors -> Check-ins
* System -> Identifies -> Characteristics of Places
* System -> Receives -> Query from User
* System -> Provides -> Results based on Query and Place Characteristics
* Check-ins -> Associated with -> Places
* Posters -> Associated with -> Check-ins
* Users -> Tagged in -> Check-ins
* System -> Analyzes -> Data associated with Check-ins
* System -> Performs -> Text analysis on text associated with Check-ins
* System -> Performs -> Image analysis on digital media associated with Check-ins
* System -> Generates -> Multi-step query tool based on User activity
"""

entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)


Entities:
('Social networking system', 'System')
('Places', 'Places')
('Check-in posts', 'Check-ins')
('Posters', 'Posters')
('Users', 'Users')
('Search query', 'Query')

Relationships:
('System', 'Monitors', 'Check-ins')
('System', 'Identifies', 'Characteristics of Places')
('System', 'Receives', 'Query from User')
('System', 'Provides', 'Results based on Query and Place Characteristics')
('Check-ins', 'Associated with', 'Places')
('Posters', 'Associated with', 'Check-ins')
('Users', 'Tagged in', 'Check-ins')
('System', 'Analyzes', 'Data associated with Check-ins')
('System', 'Performs', 'Text analysis on text associated with Check-ins')
('System', 'Performs', 'Image analysis on digital media associated with Check-ins')
('System', 'Generates', 'Multi-step query tool based on User activity')


In [37]:
from neo4j import GraphDatabase
import re

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            result = session.run(
                f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}}) RETURN e",
                name=entity
            )
            print(f"Entity creation result: {result.single()}")

        # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            result = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )
            print(f"Relationship creation result: {result.single()}")

    driver.close()

# Example data
entities = [
    ('Social networking system', 'System'),
    ('Places', 'Places'),
    ('Check-in posts', 'Check-ins'),
    ('Posters', 'Posters'),
    ('Users', 'Users'),
    ('Search query', 'Query')
]

relationships = [
    ('Social networking system', 'Monitors', 'Check-in posts'),
    ('Social networking system', 'Identifies', 'Places'),
    ('Social networking system', 'Receives', 'Search query'),
    ('Social networking system', 'Provides', 'Results based on Query and Place Characteristics'),
    ('Check-in posts', 'Associated with', 'Places'),
    ('Posters', 'Associated with', 'Check-in posts'),
    ('Users', 'Tagged in', 'Check-in posts'),
    ('Social networking system', 'Analyzes', 'Check-in posts'),
    ('Social networking system', 'Performs', 'Text analysis'),
    ('Social networking system', 'Performs', 'Image analysis'),
    ('Social networking system', 'Generates', 'Query tool')
]

# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Social networking system with type: System
Entity creation result: <Record e=<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:1' labels=frozenset({'Entity', 'System'}) properties={'name': 'Social networking system'}>>
Creating entity: Places with type: Places
Entity creation result: <Record e=<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:2' labels=frozenset({'Places', 'Entity'}) properties={'name': 'Places'}>>
Creating entity: Check-in posts with type: Check_ins
Entity creation result: <Record e=<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:0' labels=frozenset({'Check_ins', 'Entity'}) properties={'name': 'Check-in posts'}>>
Creating entity: Posters with type: Posters
Entity creation result: <Record e=<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:3' labels=frozenset({'Entity', 'Posters'}) properties={'name': 'Posters'}>>
Creating entity: Users with type: Users
Entity creation result: <Record e=<Node element_id='4:7169b4ad-1c40-49

In [38]:

# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Here is a summary of the patent document based on the extracted information:

The invention relates to a social networking system that enables users to share their experiences and locations with others. The system monitors check-in posts, which are associated with specific places. When a user makes a post, it is analyzed by the system to identify the location being referenced.

The system also receives search queries from users, allowing them to find check-in posts related to specific places or events. Posters, who are users themselves, are associated with their respective check-in posts.

In summary, the social networking system allows for the sharing of experiences and locations, enables discovery through search queries, and provides a platform for users to connect with each other based on shared interests and locations.


In [39]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

In [40]:
summary_response

'The invention relates to a social networking system that enables users to share their experiences and locations with others. The system monitors check-in posts, which are associated with specific places. When a user makes a post, it is analyzed by the system to identify the location being referenced.The system also receives search queries from users, allowing them to find check-in posts related to specific places or events. Posters, who are users themselves, are associated with their respective check-in posts.In summary, the social networking system allows for the sharing of experiences and locations, enables discovery through search queries, and provides a platform for users to connect with each other based on shared interests and locations.'

In [41]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")

{'rouge-1': {'r': 0.4772727272727273, 'p': 0.28, 'f': 0.35294117180990053}, 'rouge-2': {'r': 0.07407407407407407, 'p': 0.0380952380952381, 'f': 0.05031446092322338}, 'rouge-l': {'r': 0.38636363636363635, 'p': 0.22666666666666666, 'f': 0.2857142810535979}} 



In [42]:
reference_summary

'The present disclosure is directed toward systems and methods for inferring one or more qualities of a place based on social networking system activity associated that place. The present disclosure is also directed toward systems and methods for providing a multi-step search tool that utilizes the inferred qualities of places within a geographic area to provide a user with a more meaningful search experience.'

## Fifth patent

In [44]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.15)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14998490"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

Here are the extracted entities and relationships from the patent document:

**Entities**

* Cantilever (Type: Device)
* Microtip (Type: Device)
* Actuator (Type: Device)
* Piezoelectric film (Type: Material)
* Thermal actuator (Type: Device)
* Magnetic film actuator (Type: Device)
* PZT film (Type: Material)
* Pt film (Type: Material)
* Au film (Type: Material)
* Ti film (Type: Material)
* Reservoir (Type: Device)
* AFM equipment (Type: Device)
* Atomic force microscope (Type: Device)
* Dimension 3100 (Type: Device)
* Digital Instruments (Type: Company)
* ODT (Type: Chemical compound)
* MHA (Type: Chemical compound)
* Au surface (Type: Material)
* Gold surface (Type: Material)
* Electrolyte material (Type: Material)
* HCl (Type: Chemical compound)
* NaCl (Type: Chemical compound)
* Copper sulfate (Type: Chemical compound)

**Relationships**

* Cantilever -> extends from -> Substrate
* Microtip -> proximate to -> End of cantilever
* Actuator -> disposed on -> Cantilever
* Piezoelectric

In [45]:
import re

def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \(Type: (.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships

# Example response from the Llama3 model
response = """
**Entities**

* Cantilever (Type: Device)
* Microtip (Type: Device)
* Actuator (Type: Device)
* Piezoelectric film (Type: Material)
* Thermal actuator (Type: Device)
* Magnetic film actuator (Type: Device)
* PZT film (Type: Material)
* Pt film (Type: Material)
* Au film (Type: Material)
* Ti film (Type: Material)
* Reservoir (Type: Device)
* AFM equipment (Type: Device)
* Atomic force microscope (Type: Device)
* Dimension 3100 (Type: Device)
* Digital Instruments (Type: Company)
* ODT (Type: Chemical compound)
* MHA (Type: Chemical compound)
* Au surface (Type: Material)
* Gold surface (Type: Material)
* Electrolyte material (Type: Material)
* HCl (Type: Chemical compound)
* NaCl (Type: Chemical compound)
* Copper sulfate (Type: Chemical compound)

**Relationships**

* Cantilever -> extends from -> Substrate
* Microtip -> proximate to -> End of cantilever
* Actuator -> disposed on -> Cantilever
* Piezoelectric film -> energized by -> Actuator
* Thermal actuator -> used for -> Moving cantilever
* Magnetic film actuator -> used for -> Moving cantilever
* PZT film -> part of -> Microtip
* Pt film -> part of -> Microtip
* Au film -> part of -> Microtip
* Ti film -> part of -> Microtip
* Reservoir -> connected to -> Cantilever
* AFM equipment -> used for -> Scanning probe microscopy
* Atomic force microscope -> used for -> Scanning probe microscopy
* Dimension 3100 -> type of -> Atomic force microscope
* Digital Instruments -> manufacturer of -> Atomic force microscope
* ODT -> dispensed from -> Microtip
* MHA -> dispensed from -> Microtip
* Au surface -> probed by -> Microtip
* Gold surface -> probed by -> Microtip
* Electrolyte material -> dispensed from -> Microtip
* HCl -> part of -> Electrolyte material
* NaCl -> part of -> Electrolyte material
* Copper sulfate -> part of -> Electrolyte material
"""

entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)


Entities:
('Cantilever', 'Device')
('Microtip', 'Device')
('Actuator', 'Device')
('Piezoelectric film', 'Material')
('Thermal actuator', 'Device')
('Magnetic film actuator', 'Device')
('PZT film', 'Material')
('Pt film', 'Material')
('Au film', 'Material')
('Ti film', 'Material')
('Reservoir', 'Device')
('AFM equipment', 'Device')
('Atomic force microscope', 'Device')
('Dimension 3100', 'Device')
('Digital Instruments', 'Company')
('ODT', 'Chemical compound')
('MHA', 'Chemical compound')
('Au surface', 'Material')
('Gold surface', 'Material')
('Electrolyte material', 'Material')
('HCl', 'Chemical compound')
('NaCl', 'Chemical compound')
('Copper sulfate', 'Chemical compound')

Relationships:
('Cantilever', 'extends from', 'Substrate')
('Microtip', 'proximate to', 'End of cantilever')
('Actuator', 'disposed on', 'Cantilever')
('Piezoelectric film', 'energized by', 'Actuator')
('Thermal actuator', 'used for', 'Moving cantilever')
('Magnetic film actuator', 'used for', 'Moving cantilever'

In [46]:
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Cantilever with type: Device
Creating entity: Microtip with type: Device
Creating entity: Actuator with type: Device
Creating entity: Piezoelectric film with type: Material
Creating entity: Thermal actuator with type: Device
Creating entity: Magnetic film actuator with type: Device
Creating entity: PZT film with type: Material
Creating entity: Pt film with type: Material
Creating entity: Au film with type: Material
Creating entity: Ti film with type: Material
Creating entity: Reservoir with type: Device
Creating entity: AFM equipment with type: Device
Creating entity: Atomic force microscope with type: Device
Creating entity: Dimension 3100 with type: Device
Creating entity: Digital Instruments with type: Company
Creating entity: ODT with type: Chemical_compound
Creating entity: MHA with type: Chemical_compound
Creating entity: Au surface with type: Material
Creating entity: Gold surface with type: Material
Creating entity: Electrolyte material with type: Material
Crea

In [47]:

# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Based on the extracted information, here is a summary of the patent document:

The invention relates to a micro-electromechanical systems (MEMS) device that includes a cantilever with a piezoelectric film and an actuator. The cantilever is disposed on a substrate and has a microtip at its free end. The microtip is comprised of Pt, Ti, and Au films.

The actuator is energized by the piezoelectric film to control the movement of the cantilever. The device also includes a reservoir connected to the cantilever, which dispenses an electrolyte material containing HCl, NaCl, and Copper sulfate.

The microtip can be probed using atomic force microscopy (AFM) with a Digital Instruments AFM, which has a resolution of 3100 nm. The device can be used for various applications such as scanning probe microscopy, where the microtip is used to scan surfaces like Au or Gold.

In addition, the invention includes methods for manufacturing and operating the MEMS device, including dispensing ODT and MHA fro

In [48]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The invention relates to a micro-electromechanical systems (MEMS) device that includes a cantilever with a piezoelectric film and an actuator. The cantilever is disposed on a substrate and has a microtip at its free end. The microtip is comprised of Pt, Ti, and Au films.The actuator is energized by the piezoelectric film to control the movement of the cantilever. The device also includes a reservoir connected to the cantilever, which dispenses an electrolyte material containing HCl, NaCl, and Copper sulfate.The microtip can be probed using atomic force microscopy (AFM) with a Digital Instruments AFM, which has a resolution of 3100 nm. The device can be used for various applications such as scanning probe microscopy, where the microtip is used to scan surfaces like Au or Gold.In addition, the invention includes methods for manufacturing and operating the MEMS device, including dispensing ODT and MHA from the microtip, and using the device for sensing and actuation. Overall, the inventi

In [49]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("reference: \n", reference_summary)

{'rouge-1': {'r': 0.3153153153153153, 'p': 0.3465346534653465, 'f': 0.330188674256408}, 'rouge-2': {'r': 0.11538461538461539, 'p': 0.12883435582822086, 'f': 0.1217391254499477}, 'rouge-l': {'r': 0.27927927927927926, 'p': 0.3069306930693069, 'f': 0.29245282519980426}} 

reference: 
 A dispensing device has a cantilever comprising a plurality of thin films arranged relative to one another to define a microchannel in the cantilever and to define at least portions of a dispensing microtip proximate an end of the cantilever and communicated to the microchannel to receive material therefrom. The microchannel is communicated to a reservoir that supplies material to the microchannel. One or more reservoir-fed cantilevers may be formed on a semiconductor chip substrate. A sealing layer preferably is disposed on one of the first and second thin films and overlies outermost edges of the first and second thin films to seal the outermost edges against material leakage. Each cantilever includes an a

## sixth patent

In [50]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.15)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14990535"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

I'd be happy to help you extract entities, types, and relationships from this patent document. Here's the output:

**Entities**

* Sensor control apparatus (Type: System)
* Gas sensor (Type: Device)
* Characteristic resistor (Type: Component)
* Energizing device (Type: Component)
* Setting device (Type: Component)
* Voltage measurement device (Type: Component)
* Current value obtainment device (Type: Component)
* Current source (Type: Component)
* CPU (Type: Component)
* ROM (Type: Component)
* RAM (Type: Component)
* Signal input/output section (Type: Component)

**Relationships**

* Sensor control apparatus -> controls -> Gas sensor
* Energizing device -> supplies -> Characteristic resistor
* Setting device -> sets -> Designated current value
* Voltage measurement device -> measures -> Resistor voltage
* Current value obtainment device -> obtains -> Designated current value
* Current source -> supplies -> Operation control current
* Sensor control apparatus -> uses -> Energization de

In [52]:
import re

def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \(Type: (.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships

entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)


Entities:
('Sensor control apparatus', 'System')
('Gas sensor', 'Device')
('Characteristic resistor', 'Component')
('Energizing device', 'Component')
('Setting device', 'Component')
('Voltage measurement device', 'Component')
('Current value obtainment device', 'Component')
('Current source', 'Component')
('CPU', 'Component')
('ROM', 'Component')
('RAM', 'Component')
('Signal input/output section', 'Component')

Relationships:
('Sensor control apparatus', 'controls', 'Gas sensor')
('Energizing device', 'supplies', 'Characteristic resistor')
('Setting device', 'sets', 'Designated current value')
('Voltage measurement device', 'measures', 'Resistor voltage')
('Current value obtainment device', 'obtains', 'Designated current value')
('Current source', 'supplies', 'Operation control current')
('Sensor control apparatus', 'uses', 'Energization device, Setting device, Voltage measurement device, Current value obtainment device, Current source')


In [54]:
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Sensor control apparatus with type: System
Creating entity: Gas sensor with type: Device
Creating entity: Characteristic resistor with type: Component
Creating entity: Energizing device with type: Component
Creating entity: Setting device with type: Component
Creating entity: Voltage measurement device with type: Component
Creating entity: Current value obtainment device with type: Component
Creating entity: Current source with type: Component
Creating entity: CPU with type: Component
Creating entity: ROM with type: Component
Creating entity: RAM with type: Component
Creating entity: Signal input/output section with type: Component
Creating relationship: Sensor control apparatus -[:controls]-> Gas sensor
Creating relationship: Energizing device -[:supplies]-> Characteristic resistor
Creating relationship: Setting device -[:sets]-> Designated current value
Creating relationship: Voltage measurement device -[:measures]-> Resistor voltage
Creating relationship: Current va

In [55]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Here is a summary of the patent document based on the provided information:

The invention relates to a gas sensor system comprising a characteristic resistor and a gas sensor, both controlled by a sensor control apparatus. The system also includes an energizing device that supplies power to the characteristic resistor.

In this system, the energizing device provides energy to the characteristic resistor, which is used to sense changes in gas concentrations detected by the gas sensor. The sensor control apparatus monitors and controls the operation of both the gas sensor and the characteristic resistor, allowing for accurate detection and measurement of gas levels.

Overall, this invention enables real-time monitoring and control of gas levels using a reliable and efficient gas sensor system.


In [56]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The invention relates to a gas sensor system comprising a characteristic resistor and a gas sensor, both controlled by a sensor control apparatus. The system also includes an energizing device that supplies power to the characteristic resistor.In this system, the energizing device provides energy to the characteristic resistor, which is used to sense changes in gas concentrations detected by the gas sensor. The sensor control apparatus monitors and controls the operation of both the gas sensor and the characteristic resistor, allowing for accurate detection and measurement of gas levels.Overall, this invention enables real-time monitoring and control of gas levels using a reliable and efficient gas sensor system.'

In [57]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("refernce: \n", reference_summary)

{'rouge-1': {'r': 0.3333333333333333, 'p': 0.25862068965517243, 'f': 0.2912621310019795}, 'rouge-2': {'r': 0.10256410256410256, 'p': 0.08695652173913043, 'f': 0.09411764209273384}, 'rouge-l': {'r': 0.28888888888888886, 'p': 0.22413793103448276, 'f': 0.25242717954566884}} 

refernce: 
 A sensor control apparatus controls a gas sensor which measures the concentration of oxygen contained in exhaust gas. A current DA converter of the sensor control apparatus supplies a current having a current value designated by a control section (hereinafter referred to as the designated current value) to a label resistor having a resistance previously set so as to represent the characteristic of the gas sensor (hereinafter referred to as the characteristic resistance). Further, the control section of the sensor control apparatus sets the designated current value to a plurality of values in accordance with the characteristic resistance of the label resistor.


## 7th patent

In [60]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.15)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14989021"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

Based on the patent document, I've extracted the entities and relationships for you:

**Entities**

* Display control apparatus (Apparatus)
* Interface bandwidth control component (Component)
* Memory element (Element)
* Display controller (Controller)
* Image data flow (Flow)
* Algorithm (Algorithm)
* Lookup table (Table)
* Data flow measurement component (Component)
* Bandwidth configuration component (Component)

**Relationships**

* The display control apparatus -> contains -> Interface bandwidth control component
* The interface bandwidth control component -> measures -> image data flow
* The interface bandwidth control component -> configures -> bandwidth for image data flow
* The memory element -> stores -> image data
* The display controller -> reads -> image data from the memory element
* The interface bandwidth control component -> receives -> start of frame indication from the display controller
* The interface bandwidth control component -> measures -> image data flow betwe

In [62]:
import re

def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \((.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships


entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)


Entities:
('Display control apparatus', 'Apparatus')
('Interface bandwidth control component', 'Component')
('Memory element', 'Element')
('Display controller', 'Controller')
('Image data flow', 'Flow')
('Algorithm', 'Algorithm')
('Lookup table', 'Table')
('Data flow measurement component', 'Component')
('Bandwidth configuration component', 'Component')

Relationships:
('The display control apparatus', 'contains', 'Interface bandwidth control component')
('The interface bandwidth control component', 'measures', 'image data flow')
('The interface bandwidth control component', 'configures', 'bandwidth for image data flow')
('The memory element', 'stores', 'image data')
('The display controller', 'reads', 'image data from the memory element')
('The interface bandwidth control component', 'receives', 'start of frame indication from the display controller')
('The interface bandwidth control component', 'measures', 'image data flow between consecutive start of frame indications')
('The inter

In [70]:
relationships = [
    ('Display control apparatus', 'contains', 'Interface bandwidth control component'),
    ('Interface bandwidth control component', 'measures', 'Image data flow'),
    ('Interface bandwidth control component', 'configures', 'Image data flow'),
    ('Memory element', 'stores', 'Image data'),
    ('Display controller', 'reads', 'Memory element'),
    ('Interface bandwidth control component', 'receives', 'Display controller'),
    ('Interface bandwidth control component', 'measures', 'Image data flow'),
    ('Interface bandwidth control component', 're-configures', 'Image data flow'),
    ('Interface bandwidth control component', 'receives', 'Display update signal'),
    ('Interface bandwidth control component', 'configures', 'Image data flow'),
    ('Interface bandwidth control component', 'measures', 'Image data flow'),
    ('Interface bandwidth control component', 're-configures', 'Image data flow')
]


In [71]:
# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(
                f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}}) RETURN e",
                name=entity
            )

        # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")

            # Debugging: Check if nodes exist
            check_nodes = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                RETURN e1, e2
                """,
                entity1=entity1,
                entity2=entity2
            )
            nodes = check_nodes.single()
            if nodes is None:
                print(f"One or both entities not found: {entity1}, {entity2}")
                continue

            result = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )
            print(f"Relationship creation result: {result.single()}")

    driver.close()

# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Display control apparatus with type: Apparatus
Creating entity: Interface bandwidth control component with type: Component
Creating entity: Memory element with type: Element
Creating entity: Display controller with type: Controller
Creating entity: Image data flow with type: Flow
Creating entity: Algorithm with type: Algorithm
Creating entity: Lookup table with type: Table
Creating entity: Data flow measurement component with type: Component
Creating entity: Bandwidth configuration component with type: Component
Creating relationship: Display control apparatus -[:contains]-> Interface bandwidth control component
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c40-49a1-bd35-ea26966588c3:1152939096792891426' nodes=(<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:34' labels=frozenset() properties={}>, <Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:5' labels=frozenset() properties={}>) type='contains' properties={}>>
Creatin

In [72]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Based on the extracted information, here is a summary of the patent document:

The invention relates to an interface bandwidth control component that measures image data flow and configures/reconfigures it accordingly. The component receives display controller input and uses this information to manage the bandwidth of the image data flowing through the system.

In operation, the interface bandwidth control component measures the image data flow and then adjusts its configuration to optimize the flow. This may involve reconfiguring the flow in real-time to ensure that the display controller can efficiently read from a memory element and render high-quality images on a display.

The display control apparatus contains the interface bandwidth control component, which allows it to monitor and manage the image data flow. By doing so, the apparatus ensures that the display is properly controlled and that the image data is transmitted at an optimal rate.

Overall, the invention provides a solu

In [73]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The invention relates to an interface bandwidth control component that measures image data flow and configures/reconfigures it accordingly. The component receives display controller input and uses this information to manage the bandwidth of the image data flowing through the system.In operation, the interface bandwidth control component measures the image data flow and then adjusts its configuration to optimize the flow. This may involve reconfiguring the flow in real-time to ensure that the display controller can efficiently read from a memory element and render high-quality images on a display.The display control apparatus contains the interface bandwidth control component, which allows it to monitor and manage the image data flow. By doing so, the apparatus ensures that the display is properly controlled and that the image data is transmitted at an optimal rate.Overall, the invention provides a solution for managing image data flow in a display system, allowing for efficient and hi

In [74]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("Reference: \n", reference_summary)

{'rouge-1': {'r': 0.5319148936170213, 'p': 0.30120481927710846, 'f': 0.3846153799988167}, 'rouge-2': {'r': 0.19047619047619047, 'p': 0.12307692307692308, 'f': 0.14953270551139852}, 'rouge-l': {'r': 0.5319148936170213, 'p': 0.30120481927710846, 'f': 0.3846153799988167}} 

Reference: 
 A display control apparatus comprising at least one memory element within which image data is stored, at least one display controller arranged to read from the, or each, memory element the image data and to output display data generated from the read image data to at least one display device. The display control apparatus further comprises at least one interface component via which the display controller is arranged to read image data from the memory element. The display control apparatus further comprises at least one interface bandwidth control component arranged to measure image data flow over the interface component from the memory element to the display controller, and configure a bandwidth for image 

## 8th patent

In [78]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.1)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "15000497"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

I'd be happy to help you extract entities, types, and relationships from this patent document. Here's the output:

**Entities**

* Optical-fiber-spliced portion reinforcing heating device (Device)
* Motor (Entity)
* Tension mechanism (Mechanism)
* Optical fiber (Material)
* Sleeve (Component)
* Heaters (Components)
* Clamps (Components)
* Force-applying members (Members)
* Cam mechanisms (Mechanisms)

**Relationships**

* The optical-fiber-spliced portion reinforcing heating device uses a motor.
* The tension mechanism is controlled by the cam mechanisms and force-applying members.
* The optical fiber is coated with a sleeve.
* The heaters sandwich the sleeve.
* The clamps grasp the optical fiber.
* The force-applying members apply tension to the optical fiber.
* The cam mechanisms control movement of the tension mechanism.

Let me know if you'd like me to extract any specific information or if you have further questions!


In [79]:
relationships = [
    ('Optical-fiber-spliced portion reinforcing heating device', 'uses', 'Motor'),
    ('Tension mechanism', 'is controlled by', 'Cam mechanisms'),
    ('Tension mechanism', 'is controlled by', 'Force-applying members'),
    ('Optical fiber', 'is coated with', 'Sleeve'),
    ('Heaters', 'sandwich', 'Sleeve'),
    ('Clamps', 'grasp', 'Optical fiber'),
    ('Force-applying members', 'apply tension to', 'Optical fiber'),
    ('Cam mechanisms', 'control movement of', 'Tension mechanism')
]


In [81]:
# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(
                f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}}) RETURN e",
                name=entity
            )

        # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")

            # Debugging: Check if nodes exist
            check_nodes = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                RETURN e1, e2
                """,
                entity1=entity1,
                entity2=entity2
            )
            nodes = check_nodes.single()
            if nodes is None:
                print(f"One or both entities not found: {entity1}, {entity2}")
                continue

            result = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )
            print(f"Relationship creation result: {result.single()}")

    driver.close()

# Example data
entities = [
    ('Optical-fiber-spliced portion reinforcing heating device', 'Device'),
    ('Motor', 'Entity'),
    ('Tension mechanism', 'Mechanism'),
    ('Optical fiber', 'Material'),
    ('Sleeve', 'Component'),
    ('Heaters', 'Components'),
    ('Clamps', 'Components'),
    ('Force-applying members', 'Members'),
    ('Cam mechanisms', 'Mechanisms')
]

relationships = [
    ('Optical-fiber-spliced portion reinforcing heating device', 'uses', 'Motor'),
    ('Tension mechanism', 'is controlled by', 'Cam mechanisms'),
    ('Tension mechanism', 'is controlled by', 'Force-applying members'),
    ('Optical fiber', 'is coated with', 'Sleeve'),
    ('Heaters', 'sandwich', 'Sleeve'),
    ('Clamps', 'grasp', 'Optical fiber'),
    ('Force-applying members', 'apply tension to', 'Optical fiber'),
    ('Cam mechanisms', 'control movement of', 'Tension mechanism')
]

# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)


Creating entity: Optical-fiber-spliced portion reinforcing heating device with type: Device
Creating entity: Motor with type: Entity
Creating entity: Tension mechanism with type: Mechanism
Creating entity: Optical fiber with type: Material
Creating entity: Sleeve with type: Component
Creating entity: Heaters with type: Components
Creating entity: Clamps with type: Components
Creating entity: Force-applying members with type: Members
Creating entity: Cam mechanisms with type: Mechanisms
Creating relationship: Optical-fiber-spliced portion reinforcing heating device -[:uses]-> Motor
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c40-49a1-bd35-ea26966588c3:1152945693862658069' nodes=(<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:21' labels=frozenset() properties={}>, <Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:22' labels=frozenset() properties={}>) type='uses' properties={}>>
Creating relationship: Tension mechanism -[:is_controlled_b

In [82]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Here is a summary of the patent document:

The present invention relates to an optical-fiber-spliced portion reinforcing heating device that efficiently maintains tension on an optical fiber. The device comprises a sleeve coated with a heater, which applies heat to the optical fiber when clamps grasp it. The heaters are sandwiched between the sleeve and the clamps.

A motor powers a cam mechanism that controls movement of a tension mechanism. The tension mechanism is controlled by both the cam mechanism and force-applying members, allowing for precise tensioning of the optical fiber. This enables reliable maintenance of the tension, thereby minimizing stress on the fiber and preventing damage or signal degradation.

The device ensures consistent heating and tensioning of the optical fiber through its innovative design, providing a reliable solution for maintaining the integrity of the fiber in high-reliability applications such as telecommunications networks.


In [83]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The present invention relates to an optical-fiber-spliced portion reinforcing heating device that efficiently maintains tension on an optical fiber. The device comprises a sleeve coated with a heater, which applies heat to the optical fiber when clamps grasp it. The heaters are sandwiched between the sleeve and the clamps.A motor powers a cam mechanism that controls movement of a tension mechanism. The tension mechanism is controlled by both the cam mechanism and force-applying members, allowing for precise tensioning of the optical fiber. This enables reliable maintenance of the tension, thereby minimizing stress on the fiber and preventing damage or signal degradation.The device ensures consistent heating and tensioning of the optical fiber through its innovative design, providing a reliable solution for maintaining the integrity of the fiber in high-reliability applications such as telecommunications networks.'

In [84]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("Reference: \n", reference_summary)

{'rouge-1': {'r': 0.4027777777777778, 'p': 0.3411764705882353, 'f': 0.36942674662663805}, 'rouge-2': {'r': 0.11666666666666667, 'p': 0.11570247933884298, 'f': 0.11618256761419418}, 'rouge-l': {'r': 0.3333333333333333, 'p': 0.2823529411764706, 'f': 0.30573247911071444}} 

Reference: 
 An optical-fiber-spliced portion reinforcing heating device of the invention includes: a pair of clamps that respectively grasp a coated portion of an optical fiber, the optical fiber including a fusion-spliced portion, the fusion-spliced portion being coated with a sleeve, the coated portion being exposed from the sleeve; at least two or more heaters that are arranged to face each other so as to sandwich the sleeve; a first force-applying member that presses at least one of the paired clamps so as to apply a tension to the optical fiber; and a second force-applying member that applies a pressing force to at least one or more of the heaters via the sleeve by use of one of an elastic member and a magnetic m

## 9th Patent

In [85]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.1)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "14994324"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

Here are the extracted entities, types, and relationships:

**Entities**

* Apparatus (Device)
* Housing (Physical Structure)
* Touch-enabled pad (Input Device)
* Processor (Computer Component)
* Storage (Data Storage)
* Button (User Interface Element)
* Light-reactive substance (Material)
* Display (Visual Output Device)

**Relationships**

* The apparatus -> contains -> housing
* The apparatus -> contains -> touch-enabled pad
* The apparatus -> contains -> processor
* The apparatus -> contains -> storage
* The touch-enabled pad -> is coupled to -> the apparatus
* The button -> is coupled to -> the apparatus
* The light-reactive substance -> is used by -> the apparatus
* The display -> is coupled to -> the apparatus

Note that some entities may have multiple relationships, but I've only listed each entity once in the above format. Let me know if you'd like me to expand on this or clarify any specific points!


In [86]:
# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(
                f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}}) RETURN e",
                name=entity
            )

        # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")

            # Debugging: Check if nodes exist
            check_nodes = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                RETURN e1, e2
                """,
                entity1=entity1,
                entity2=entity2
            )
            nodes = check_nodes.single()
            if nodes is None:
                print(f"One or both entities not found: {entity1}, {entity2}")
                continue

            result = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )
            print(f"Relationship creation result: {result.single()}")

    driver.close()

# Example data
entities = [
    ('Apparatus', 'Device'),
    ('Housing', 'Physical Structure'),
    ('Touch-enabled pad', 'Input Device'),
    ('Processor', 'Computer Component'),
    ('Storage', 'Data Storage'),
    ('Button', 'User Interface Element'),
    ('Light-reactive substance', 'Material'),
    ('Display', 'Visual Output Device')
]

relationships = [
    ('Apparatus', 'contains', 'Housing'),
    ('Apparatus', 'contains', 'Touch-enabled pad'),
    ('Apparatus', 'contains', 'Processor'),
    ('Apparatus', 'contains', 'Storage'),
    ('Touch-enabled pad', 'is coupled to', 'Apparatus'),
    ('Button', 'is coupled to', 'Apparatus'),
    ('Light-reactive substance', 'is used by', 'Apparatus'),
    ('Display', 'is coupled to', 'Apparatus')
]
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)


Creating entity: Apparatus with type: Device
Creating entity: Housing with type: Physical_Structure
Creating entity: Touch-enabled pad with type: Input_Device
Creating entity: Processor with type: Computer_Component
Creating entity: Storage with type: Data_Storage
Creating entity: Button with type: User_Interface_Element
Creating entity: Light-reactive substance with type: Material
Creating entity: Display with type: Visual_Output_Device
Creating relationship: Apparatus -[:contains]-> Housing
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c40-49a1-bd35-ea26966588c3:1152939096792891424' nodes=(<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:32' labels=frozenset() properties={}>, <Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:17' labels=frozenset() properties={}>) type='contains' properties={}>>
Creating relationship: Apparatus -[:contains]-> Touch-enabled pad
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c

In [87]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Here is a summary based on the provided information:

The apparatus is a touch-sensitive device that includes a storage component, housing, a touch-enabled pad, and a processor. The touch-enabled pad allows for user input through gestures or taps. A light-reactive substance is used by the apparatus to react to various inputs.

The apparatus also features a display that is connected to it. Additionally, there are buttons (at least one) that are coupled to the apparatus, allowing for further interaction with the device.

In summary, this apparatus is an interactive device that enables users to input data through touch gestures and reactions to light, with visual output on the display, and further control options available through connected buttons.


In [88]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The apparatus is a touch-sensitive device that includes a storage component, housing, a touch-enabled pad, and a processor. The touch-enabled pad allows for user input through gestures or taps. A light-reactive substance is used by the apparatus to react to various inputs.The apparatus also features a display that is connected to it. Additionally, there are buttons (at least one) that are coupled to the apparatus, allowing for further interaction with the device.In summary, this apparatus is an interactive device that enables users to input data through touch gestures and reactions to light, with visual output on the display, and further control options available through connected buttons.'

In [89]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("Reference: \n", reference_summary)

{'rouge-1': {'r': 0.5862068965517241, 'p': 0.24285714285714285, 'f': 0.343434339291909}, 'rouge-2': {'r': 0.23255813953488372, 'p': 0.09615384615384616, 'f': 0.13605441762969145}, 'rouge-l': {'r': 0.5517241379310345, 'p': 0.22857142857142856, 'f': 0.3232323190898888}} 

Reference: 
 In one aspect, an apparatus includes a housing, a touch-enabled pad coupled to the housing, a processor, and a memory accessible to the processor. The memory bears instructions executable by the processor to receive input to the touch-enabled pad and to present an indication of the input on the touch-enabled pad.


## 10th Patent

In [90]:
# Initialize the model
llm = ChatOllama(model="llama3", temperature=0.1)
# Define a prompt for extracting information
prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""

def get_patent_data(patent_number):
    for patent in g_patents:
        if patent['patent_number'] == patent_number:
            return patent
    return None

patent_number = "15001450"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")


prompt = ChatPromptTemplate.from_template(prompt_text)

# Define a function to extract entities, their types, and relationships
def extract_entities_and_relationships(text):
    chain = prompt | llm | StrOutputParser()
    response = chain.invoke({"text": text})
    return response

# Define a function to parse the response into entities and relationships
def parse_response(response):
    lines = response.split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities:**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships:**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            match = re.match(r"^(.*)\s+\((.*)\)$", stripped_line.lstrip("-* ").rstrip().strip())
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and (stripped_line.startswith("-") or stripped_line.startswith("*")):
            relationships.append(stripped_line.lstrip("-* ").rstrip().strip())

    return entities, relationships

# Function to clean relationship types
def clean_relationship_type(relation):
    return re.sub(r'[^a-zA-Z0-9_]', '_', relation)

# Function to sanitize entity types
def sanitize_entity_type(entity_type):
    return re.sub(r'[^a-zA-Z0-9]', '_', entity_type)

# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}})", name=entity)

         # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")
            session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )

    driver.close()

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

Here are the extracted entities and relationships:

**Entities**

* Image forming apparatus (Device)
* Housing (Part of Device)
* Support (Part of Device)
* Guiding portion (Part of Device)
* Add-on device (Device)
* Sheet-post-operation device (Type of Add-on device)
* Recording medium (Material)

**Relationships**

* Image forming apparatus -> contains -> Housing
* Image forming apparatus -> contains -> Support
* Image forming apparatus -> contains -> Guiding portion
* Add-on device -> is mounted on -> Image forming apparatus
* Sheet-post-operation device -> is a type of -> Add-on device
* Recording medium -> is used by -> Image forming apparatus


In [93]:
# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(
                f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}}) RETURN e",
                name=entity
            )

        # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")

            # Debugging: Check if nodes exist
            check_nodes = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                RETURN e1, e2
                """,
                entity1=entity1,
                entity2=entity2
            )
            nodes = check_nodes.single()
            if nodes is None:
                print(f"One or both entities not found: {entity1}, {entity2}")
                continue

            result = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )
            print(f"Relationship creation result: {result.single()}")

    driver.close()

# Example data
entities = [
    ('Image forming apparatus', 'Device'),
    ('Housing', 'Part of Device'),
    ('Support', 'Part of Device'),
    ('Guiding portion', 'Part of Device'),
    ('Add-on device', 'Device'),
    ('Sheet-post-operation device', 'Type of Add-on device'),
    ('Recording medium', 'Material')
]

relationships = [
    ('Image forming apparatus', 'contains', 'Housing'),
    ('Image forming apparatus', 'contains', 'Support'),
    ('Image forming apparatus', 'contains', 'Guiding portion'),
    ('Add-on device', 'is mounted on', 'Image forming apparatus'),
    ('Sheet-post-operation device', 'is a type of', 'Add-on device'),
    ('Recording medium', 'is used by', 'Image forming apparatus')
]
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)


Creating entity: Image forming apparatus with type: Device
Creating entity: Housing with type: Part_of_Device
Creating entity: Support with type: Part_of_Device
Creating entity: Guiding portion with type: Part_of_Device
Creating entity: Add-on device with type: Device
Creating entity: Sheet-post-operation device with type: Type_of_Add_on_device
Creating entity: Recording medium with type: Material
Creating relationship: Image forming apparatus -[:contains]-> Housing
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c40-49a1-bd35-ea26966588c3:1152939096792891412' nodes=(<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:20' labels=frozenset() properties={}>, <Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:2' labels=frozenset() properties={}>) type='contains' properties={}>>
Creating relationship: Image forming apparatus -[:contains]-> Support
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c40-49a1-bd35-ea26966588c

In [94]:
# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

Here is a summary of the patent document:

The present invention relates to an image forming apparatus that contains a housing, guiding portion, and support. The apparatus also includes an add-on device that is mounted on the apparatus. The add-on device is a type of sheet-post-operation device.

In operation, the image forming apparatus uses a recording medium to form images. The guiding portion helps to position the recording medium correctly for imaging. The support provides structural integrity to the apparatus and may also aid in the positioning of the recording medium.

The addition of the sheet-post-operation add-on device enhances the functionality of the image forming apparatus, allowing it to perform additional tasks related to processing or manipulating sheets of material after they have been imaged.

Overall, the invention provides a versatile and efficient image forming apparatus that can be used for a variety of applications.


In [95]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The present invention relates to an image forming apparatus that contains a housing, guiding portion, and support. The apparatus also includes an add-on device that is mounted on the apparatus. The add-on device is a type of sheet-post-operation device.In operation, the image forming apparatus uses a recording medium to form images. The guiding portion helps to position the recording medium correctly for imaging. The support provides structural integrity to the apparatus and may also aid in the positioning of the recording medium.The addition of the sheet-post-operation add-on device enhances the functionality of the image forming apparatus, allowing it to perform additional tasks related to processing or manipulating sheets of material after they have been imaged.Overall, the invention provides a versatile and efficient image forming apparatus that can be used for a variety of applications.'

In [96]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("Reference: \n", reference_summary)

{'rouge-1': {'r': 0.39285714285714285, 'p': 0.2894736842105263, 'f': 0.33333332844811764}, 'rouge-2': {'r': 0.09803921568627451, 'p': 0.08264462809917356, 'f': 0.08968609369100554}, 'rouge-l': {'r': 0.35714285714285715, 'p': 0.2631578947368421, 'f': 0.30303029814508725}} 

Reference: 
 An image forming apparatus includes a housing in which an image forming unit is to be mounted, a support that supports an image reading device in a state where the image reading device is separated from the housing, and a guiding portion in which a guide part, which is formed on a to-be-mounted surface of an add-on device in such a manner as to project, is to be inserted such that the guide part is guided by the guiding portion. Engagement holes with which engagement portions, which are formed on the to-be-mounted surface of the add-on device in such a manner as to project, are to be engaged such that the engagement portions are positioned by the engagement holes are formed in a mounting surface of the h

## 11th patent

In [99]:
patent_number = "14996316"
patent = get_patent_data(patent_number)
if not patent:
    raise ValueError(f"Patent number {patent_number} not found.")

prompt_text = """
You are a helpful AI assistant for extracting entities, types and relationships from patent documents.
You are not allowed produce a sentence when extracting the relationships.
Extract key entities, their types (persons, locations, etc.) and their relationships from the following text and present them in a structured format:

Text: {text}

Format:
**Entities**
- Entity: Type
- Entity: Type
- ...

**Relationships**
- Entity1 -> Relationship -> Entity2
- Entity3 -> Relationship -> Entity4
"""
prompt = ChatPromptTemplate.from_template(prompt_text)

if patent:
    description = patent.get('description', '')
    claims = patent.get('claims', '')

    if description and claims:
        text = f"Description: {description}\nClaims: {claims}"

        # Extract information
        response = extract_entities_and_relationships(text)
        print(response)

else:
    print(f"Patent with number {patent_number} not found.")

I'm happy to help you extract entities, types, and relationships from the patent document. Here's my attempt:

**Entities**

* Memory device (Type: Device)
* Memory unit (Type: Unit)
* Controller (Type: Device)
* Bus (Type: Communication medium)
* Cycle (Type: Event)
* Command cycle (Type: Subtype of Cycle)
* Address cycle (Type: Subtype of Cycle)
* Data cycle (Type: Subtype of Cycle)
* Volume select command (Type: Command)

**Relationships**

* Memory device -> contains -> Memory unit
* Controller -> communicates with -> Bus
* Bus -> carries -> Cycles (including Command cycles, Address cycles, and Data cycles)
* Memory unit -> receives -> Command cycle (including Volume select commands)
* Memory unit -> executes -> Subsequent command (following a Volume select command)
* Memory device -> shares -> Bus with other memory devices

Please let me know if this is accurate or if I've missed anything!


In [101]:
def parse_response(response):
    lines = response.strip().split("\n")
    entities = []
    relationships = []
    is_entity_section = False
    is_relationship_section = False

    for line in lines:
        stripped_line = line.strip()
        if stripped_line == "**Entities**":
            is_entity_section = True
            is_relationship_section = False
            continue
        elif stripped_line == "**Relationships**":
            is_entity_section = False
            is_relationship_section = True
            continue

        if is_entity_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) \(Type: (.*)\)$", stripped_line)
            if match:
                entity_name = match.group(1).strip()
                entity_type = match.group(2).strip()
                entities.append((entity_name, entity_type))
            else:
                print(f"Failed to parse entity: {stripped_line}")
        elif is_relationship_section and stripped_line.startswith("*"):
            match = re.match(r"^\* (.*) -> (.*) -> (.*)$", stripped_line)
            if match:
                entity1 = match.group(1).strip()
                relation = match.group(2).strip()
                entity2 = match.group(3).strip()
                relationships.append((entity1, relation, entity2))
            else:
                print(f"Failed to parse relationship: {stripped_line}")

    return entities, relationships
entities, relationships = parse_response(response)

print("Entities:")
for entity in entities:
    print(entity)

print("\nRelationships:")
for relationship in relationships:
    print(relationship)

Entities:
('Memory device', 'Device')
('Memory unit', 'Unit')
('Controller', 'Device')
('Bus', 'Communication medium')
('Cycle', 'Event')
('Command cycle', 'Subtype of Cycle')
('Address cycle', 'Subtype of Cycle')
('Data cycle', 'Subtype of Cycle')
('Volume select command', 'Command')

Relationships:
('Memory device', 'contains', 'Memory unit')
('Controller', 'communicates with', 'Bus')
('Bus', 'carries', 'Cycles (including Command cycles, Address cycles, and Data cycles)')
('Memory unit', 'receives', 'Command cycle (including Volume select commands)')
('Memory unit', 'executes', 'Subsequent command (following a Volume select command)')
('Memory device', 'shares', 'Bus with other memory devices')


In [106]:
# Example data
entities = [
    ('Memory device', 'Device'),
    ('Memory unit', 'Unit'),
    ('Controller', 'Device'),
    ('Bus', 'Communication medium'),
    ('Cycle', 'Event'),
    ('Command cycle', 'Subtype of Cycle'),
    ('Address cycle', 'Subtype of Cycle'),
    ('Data cycle', 'Subtype of Cycle'),
    ('Volume select command', 'Command')
]

relationships = [
    ('Memory device', 'contains', 'Memory unit'),
    ('Controller', 'communicates with', 'Bus'),
    ('Bus', 'carries', 'Cycle'),
    ('Memory unit', 'receives', 'Command cycle'),
    ('Memory unit', 'executes', 'Volume select command'),
    ('Memory device', 'shares', 'Bus')
]

In [107]:
# Function to create knowledge graph
def create_knowledge_graph(url, username, password, entities, relationships):
    driver = GraphDatabase.driver(url, auth=(username, password))

    with driver.session() as session:
        # Create nodes with appropriate labels
        for entity, entity_type in entities:
            sanitized_entity_type = sanitize_entity_type(entity_type)
            print(f"Creating entity: {entity} with type: {sanitized_entity_type}")
            session.run(
                f"MERGE (e:Entity:{sanitized_entity_type} {{name: $name}}) RETURN e",
                name=entity
            )

        # Create relationships
        for entity1, relation, entity2 in relationships:
            relation = clean_relationship_type(relation)
            print(f"Creating relationship: {entity1} -[:{relation}]-> {entity2}")

            # Debugging: Check if nodes exist
            check_nodes = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                RETURN e1, e2
                """,
                entity1=entity1,
                entity2=entity2
            )
            nodes = check_nodes.single()
            if nodes is None:
                print(f"One or both entities not found: {entity1}, {entity2}")
                continue

            result = session.run(
                f"""
                MATCH (e1:Entity {{name: $entity1}}), (e2:Entity {{name: $entity2}})
                MERGE (e1)-[r:{relation}]->(e2)
                RETURN r
                """,
                entity1=entity1,
                entity2=entity2
            )
            print(f"Relationship creation result: {result.single()}")

    driver.close()
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

create_knowledge_graph(url, username, password, entities, relationships)

Creating entity: Memory device with type: Device
Creating entity: Memory unit with type: Unit
Creating entity: Controller with type: Device
Creating entity: Bus with type: Communication_medium
Creating entity: Cycle with type: Event
Creating entity: Command cycle with type: Subtype_of_Cycle
Creating entity: Address cycle with type: Subtype_of_Cycle
Creating entity: Data cycle with type: Subtype_of_Cycle
Creating entity: Volume select command with type: Command
Creating relationship: Memory device -[:contains]-> Memory unit
Relationship creation result: <Record r=<Relationship element_id='5:7169b4ad-1c40-49a1-bd35-ea26966588c3:1152939096792891412' nodes=(<Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:20' labels=frozenset() properties={}>, <Node element_id='4:7169b4ad-1c40-49a1-bd35-ea26966588c3:8' labels=frozenset() properties={}>) type='contains' properties={}>>
Creating relationship: Controller -[:communicates_with]-> Bus
Relationship creation result: <Record r=<Relationship

In [7]:
# Initialize the Neo4j connection
url = "neo4j+s://cde51c3f.databases.neo4j.io"
username = "neo4j"
password = "RYpXnwJ8nXt8jNbNYDOY82_L4D75TMBJSu4rYa4AE-8"
# Create the knowledge graph in Neo4j

# Initialize the model
llm = ChatOllama(model="llama3")

# Define a prompt for generating summaries from the knowledge graph
summary_prompt_text = """
You are a helpful AI assistant for summarizing patent documents.
Generate a summary based on the following information extracted from the knowledge graph:

Entities:
{entities}

Relationships:
{relationships}

Summary:
"""

summary_prompt = ChatPromptTemplate.from_template(summary_prompt_text)

# Function to execute a Cypher query and retrieve information
def execute_cypher_query(query, url, username, password):
    driver = GraphDatabase.driver(url, auth=(username, password))
    results = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            results.append(record)
    driver.close()
    return results

# Example Cypher query to retrieve entities and relationships
cypher_query = """
MATCH (e:Entity)-[r]->(e2:Entity)
RETURN e.name AS entity1, TYPE(r) AS relationship, e2.name AS entity2
"""

# Execute the query and retrieve information
results = execute_cypher_query(cypher_query, url, username, password)
entities = set()
relationships = []

for record in results:
    entities.add(record['entity1'])
    entities.add(record['entity2'])
    relationships.append(f"{record['entity1']} -> {record['relationship']} -> {record['entity2']}")

# Convert sets to lists
entities = list(entities)
relationships = list(relationships)

# Format entities and relationships for the prompt
entities_text = "\n".join([f"- {entity}" for entity in entities])
relationships_text = "\n".join([f"- {relationship}" for relationship in relationships])

summary_input = {
    "entities": entities_text,
    "relationships": relationships_text
}

summary_chain = LLMChain(prompt=summary_prompt, llm=llm, output_parser=StrOutputParser())
summary_response = summary_chain.run(summary_input)

print(summary_response)

  warn_deprecated(
  warn_deprecated(


Based on the provided information, here is a summary of the patent document:

The invention relates to a memory device and a controller that interacts with it. The memory device contains a memory unit that receives command cycles from the outside world. When a volume select command is received, the memory unit executes this command.

The memory device shares a bus with the controller, which communicates with the bus to send commands to the memory device. The bus carries these commands in the form of a cycle. This allows for efficient and controlled access to the memory unit within the memory device.

In summary, the patent document describes a system where a controller sends commands to a memory device through a shared bus, which carries the commands as cycles. The memory device executes these commands on its memory unit, allowing for controlled access to stored data.


In [8]:
first_double_newline = summary_response.find("\n\n")
if first_double_newline != -1:
    summary_response = summary_response[first_double_newline + 2:]
summary_response = summary_response.replace("\n\n", "")
summary_response = summary_response.replace("**Summary**", "")
summary_response

'The invention relates to a memory device and a controller that interacts with it. The memory device contains a memory unit that receives command cycles from the outside world. When a volume select command is received, the memory unit executes this command.The memory device shares a bus with the controller, which communicates with the bus to send commands to the memory device. The bus carries these commands in the form of a cycle. This allows for efficient and controlled access to the memory unit within the memory device.In summary, the patent document describes a system where a controller sends commands to a memory device through a shared bus, which carries the commands as cycles. The memory device executes these commands on its memory unit, allowing for controlled access to stored data.'

In [14]:
from rouge import Rouge

generated_summary = summary_response

reference_summary = patent['abstract']

rouge = Rouge()
scores = rouge.get_scores(generated_summary, reference_summary)

for score in scores:
  print(score,"\n")
  print("Reference: \n", reference_summary)

{'rouge-1': {'r': 0.2926829268292683, 'p': 0.1791044776119403, 'f': 0.22222221751200283}, 'rouge-2': {'r': 0.13559322033898305, 'p': 0.07339449541284404, 'f': 0.09523809068098094}, 'rouge-l': {'r': 0.24390243902439024, 'p': 0.14925373134328357, 'f': 0.1851851804749658}} 

Reference: 
 Systems, devices, memory controllers, and methods for controlling memory are described. One such method includes activating a memory unit of a memory device; after activating the memory unit, providing a command to the memory device; and returning the memory unit to a previous state if the command does not indicate a target memory volume, wherein the memory unit remains active if the command indicates a target memory volume associated with the memory unit.
