In [7]:
import re
import json
from itertools import islice
from patent_model import Patent
from preprocess import json_to_patent

def clean_text(text, patterns):
    for pattern in patterns:
        text = re.sub(pattern, '', text)
    return text

# Define regex patterns
patterns_preamble = [r'^\d+\.\s*', r'comprising:$', r'^(a |an |An |A)']
patterns_component = [r'comprising:$', r'; and$', r'^(a |an |An |A)']
patterns_subcomponent = [r'^(comprising\s+|a\s+|an\s+|\d+\s+)*', r'comprising:$', r'; and$', r'^(a |an |An |A)']

# Initialize questions dictionary
questions_dict = {}

# Extracting the patent information
patent_number = '07404685'
with open(f'json_patents/{patent_number}.json', 'r') as file:
    data = json.load(file)

patent = json_to_patent(json.dumps(data))
claims = patent.claims

for claim in claims:
    preamble_key = claim["preamble"]
    preamble = clean_text(claim["preamble"], patterns_preamble)
    components = claim.get("components", [])
    
    if preamble not in questions_dict:
        questions_dict[preamble_key] = {}
        
    if not components:
        # If only a preamble exists
        if '' not in questions_dict[preamble_key]:
            questions_dict[preamble_key][''] = {
                '': [f"What is the role of {preamble}?",
                    f"How is {preamble} utilized?",
                    f"What outcomes are produced by {preamble}?",
                    f"What alternative components can substitute {preamble}?"]
            }
    else:
        for component_data in components:
            component_key = component_data["component"]
            component = clean_text(component_data["component"], patterns_component)
            subcomponents = component_data.get("subcomponents", [])
            
            if component not in questions_dict[preamble_key]:
                questions_dict[preamble_key][component_key] = {}
                
            if not subcomponents:
                if '' not in questions_dict[preamble_key][component_key]:
                    questions_dict[preamble_key][component_key][''] = [f"What is the role of the {component} of the {preamble}?",
                                                              f"How is the {component} of the {preamble} utilized?",
                                                              f"What outcomes are produces by the {component} of the {preamble}?",
                                                              f"What alternative components can substitute the {component} of the {preamble}?"]
            else:
                for subcomponent in subcomponents:
                    subcomponent_key = subcomponent
                    subcomponent = clean_text(subcomponent, patterns_subcomponent)
                    
                    if subcomponent not in questions_dict[preamble_key][component_key]:
                        questions_dict[preamble_key][component_key][subcomponent_key] = []
                    
                    # Add questions only if the list is empty
                    if not questions_dict[preamble_key][component_key][subcomponent_key]:
                        questions_dict[preamble_key][component_key][subcomponent_key].extend([
                            f"What is the role of the {subcomponent} in the {component} of the {preamble}?",
                            f"How is the {subcomponent} utilized within the {component} of the {preamble}?",
                            f"What outcomes are produced by {subcomponent} in the {component} of the {preamble}?",
                            f"What alternative components can substitute {subcomponent} in the {component} of the {preamble}?"
                        ])
                        
first_three_items = dict(islice(questions_dict.items(), 1))
print(json.dumps(first_three_items, indent=4))

{
    "1. A ring binder mechanism for retaining loose-leaf pages, the mechanism comprising:": {
        "a housing;": {
            "": [
                "What is the role of the housing; of the  ring binder mechanism for retaining loose-leaf pages, the mechanism ?",
                "How is the housing; of the  ring binder mechanism for retaining loose-leaf pages, the mechanism  utilized?",
                "What outcomes are produces by the housing; of the  ring binder mechanism for retaining loose-leaf pages, the mechanism ?",
                "What alternative components can substitute the housing; of the  ring binder mechanism for retaining loose-leaf pages, the mechanism ?"
            ]
        },
        "hinge plates supported by the housing for pivoting motion relative to the housing;": {
            "": [
                "What is the role of the hinge plates supported by the housing for pivoting motion relative to the housing; of the  ring binder mechanism for retaining loose-l

In [9]:
# pip install azure-ai-inference
import os
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential

os.environ["AZURE_INFERENCE_CREDENTIAL"] = ""
api_key = os.getenv("AZURE_INFERENCE_CREDENTIAL", '')
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")
    
client = ChatCompletionsClient(
    endpoint='https://Phi-3-5-MoE-instruct-gaqxj.eastus2.models.ai.azure.com',
    credential=AzureKeyCredential(api_key)
)

def structure_to_string(data):
    claim_strings = []
    for claim in data:
        claim_str = f"{claim['claim_number']}\n{claim['preamble']}\n"
        for component in claim['components']:
            claim_str += f"  {component['component']}\n"
            for subcomponent in component['subcomponents']:
                claim_str += f"    {subcomponent}\n"
        claim_strings.append(claim_str)
    return "\n".join(claim_strings)

def generate_answers(abstract, patent_title, claims, questions):
    title_example = "Apparatus for docking a printed circuit board"
    question_example = "What is the role of a guide plate selectively securable in a position adjacent to a first electronic connector, the guide plate including at least one linear slot of the apparatus?"
    answer_example = "The goal is to select and secure the electric circuit board in a designated location."
    question_example1 = "What alternative components can substitute the guide plate selectively securable in a position adjacent to a first electronic connector, the guide plate including at least one linear slot of the apparatus?"
    answer_example1 = "Alternative componets may include adjustable brackets, spring clips, guide rails, magnetic strips, or standarized slots or holes."
    #print(title_example)

    # Initialize messages with the example
    initial_messages = [
        {
            "role": "system",
            "content": "You are a patent expert and answer questions according to the doctrine of equivalents in a brief and direct manner in less than one hundred words."
        },
        {
            "role": "user",
            "content": f"{question_example}"
        },
        {
            "role": "assistant",
            "content": f"{answer_example}"
        },
        {
            "role": "user",
            "content": f"{question_example1}"
        },
        {
            "role": "assistant",
            "content": f"{answer_example1}"
        }
    ]

    answers = []
    for i, question in enumerate(questions):
        # Include the first example to follow the few-shot strategy
        messages = initial_messages + [
            {
                "role": "user",
                "content": f"The patent title is: {patent_title}. The patent abstract is: {abstract}. {question}"
            }
        ]

        payload = {
            "messages": messages,
            "max_tokens": 100, 
            "temperature": 0.5, 
            "top_p": 0.1,
            "presence_penalty": 0.5,
            "frequency_penalty": 0.5 
        }
        #print("payload", payload)
        response = client.complete(payload)
        #print("the response to the questions are:", response.choices[0].message.content)
        answers.append(response.choices[0].message.content)
    #print(json.dumps(answers, indent=4))
    return answers


def query_patent(patent, questions):
    answers = {}
    claims = patent.claims
    abstract = patent.abstract
    patent_title = patent.title

    for preamble in questions:
        if preamble not in answers:
            answers[preamble] = {}

        # Check if there are components under the preamble
        components = questions[preamble]
        if not components:
            # Directly answer if there are no components
            answers[preamble] = generate_answers(
                abstract, patent_title, structure_to_string(claims), preamble
            )
            continue
        
        for component in components:
            if component not in answers[preamble]:
                answers[preamble][component] = {}

            # Check if there are subcomponents under the component
            subcomponents = questions[preamble][component]
            if not subcomponents:
                # Directly answer if there are no subcomponents
                answers[preamble][component] = generate_answers(
                    abstract, patent_title, structure_to_string(claims), component
                )
                continue

            for subcomponent in subcomponents:
                if subcomponent not in answers[preamble][component]:
                    answers[preamble][component][subcomponent] = generate_answers(
                        abstract, patent_title, structure_to_string(claims),
                        questions[preamble][component][subcomponent]
                    )


    return answers

answers = query_patent(patent, questions_dict)
with open(f'question_feature/{patent.document_number}.json', 'w') as file:
    json.dump(answers, file, indent=4)

In [10]:
from general_patent_query import query_patent_general

patent_questions = ["What is the essential function of the product or process?",
                    "How does the product or process operate?",
                    "What results does this product or process achieve?",
                    "What ingredients, materials, or processes are alternatives that can work in the same way?"
                   ]

answers = query_patent_general(client, patent, patent_questions)

combined_data = {
    "title": patent.title,
    "abstract": patent.abstract,
    "claims": patent.claims,
    "answers": answers
}

# Write the combined data to a JSON file
with open(f'question_patent/{patent.document_number}.json', 'w') as file:
    json.dump(combined_data, file, indent=4)

print(answers)

[' The essential function of this ring binder mechanism is to securely retain loose-leaf pages by allowing the user to easily open and close rings, which pivot between a locked (closed) position for page stability and an unlocked (open) position for adding or removing pages. The mechanism employs a spring-biased control structure that automatically returns the rings to the locked position when released.', ' The ring binder mechanism operates by allowing the user to pivot the hinge plates between a closed position, where the rings form a continuous loop for retaining loose-leaf pages, and an open position, where the rings can be easily accessed for adding or removing pages. A lever, connected to a travel bar and actuated by the user, controls this motion. A torsion spring is engaged with the lever, urging it towards the closed position when released. This ensures that', ' The ring binder mechanism achieves the secure retention of loose-leaf pages, allowing for easy addition or removal o