In [None]:
# Create a function which extracts metadata key and value from a given query:
# provided `query`: str and `avaialble metadata to look for`: list[str]

In [None]:
import os  
from openai import AzureOpenAI 
import json
from dotenv import load_dotenv

load_dotenv()

In [None]:
client = AzureOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("OPENAI_API_HOST")
)

In [None]:
def construct_function_schema(available_metadata, available_metadata_description):  
    properties = {}  
    for key in available_metadata:  
        properties[key] = {  
            "type": "string",  
            "description": available_metadata_description.get(key, "")  
        }  
    function_schema = [  
        {  
            "type": "function",
            "function": {
                "name": "extract_metadata",  
                "description": "Extract metadata from the user's query based on the available metadata keys and descriptions. Return the extracted metadata as key-value pairs.",  
                "parameters": {  
                    "type": "object",  
                    "properties": properties,  
                    "required": []
                }  
            }
        }  
    ]  
    return function_schema 

In [None]:
def process_query(query, available_metadata, available_metadata_description):  
    # Construct the system prompt including available metadata and descriptions  
    system_message = (  
        "You are an assistant that extracts metadata from user queries based on available metadata keys and their descriptions.\n"  
        "When the user provides a query, identify any metadata present that matches the available metadata keys and return it by calling the 'extract_metadata' function. "  
        "If the query does not have any metadata from the list available, then just respond to the query by being a helpful assistant.\n" 
        "Available metadata keys and descriptions:\n"  
    )  
    for key in available_metadata:    
        description = available_metadata_description.get(key, "")    
        system_message += f"- {key}: {description}\n"
  
    # Prepare the messages  
    messages = [  
        {"role": "system", "content": system_message},  
        {"role": "user", "content": query}  
    ]  
    
    # Construct the function schema dynamically  
    function_schema = construct_function_schema(available_metadata, available_metadata_description) 
  
    # Call the OpenAI ChatCompletion API with function calling  
    response = client.chat.completions.create(  
        model="gpt-4o-mini",  # or "gpt-4-0613" if you have access  
        messages=messages,  
        tools=function_schema,  
        tool_choice="auto", 
    )  
    # print(response)
    # Extract the assistant's response  
    message = response.choices[0].message
  
    if message.tool_calls:  
        # The assistant wants to call a function  
        function_name = message.tool_calls[0].function.name 
        if function_name == "extract_metadata":  
            # Parse the function call arguments  
            function_args = json.loads(message.tool_calls[0].function.arguments)   
            return function_args  
    else:  
        # No function call, return the assistant's reply  
        return message.content
  

In [None]:
# Example 1

query = "Find me books by George Orwell published after 1945"  

available_metadata_description = {  
    "author": "The name of the person who wrote the book. Example: 'George Orwell', 'J.K. Rowling'",  
    "publication_year": "The year the book was published. Example: '1945', '1997'",
    "genre": "The category or type of the book. Example: 'Dystopian', 'Fantasy', 'Sci-Fi'" 
} 

available_metadata = list(available_metadata_description.keys())

result = process_query(query, available_metadata, available_metadata_description)  
print(result)  

In [None]:
# Example 2: Problem Ticket querying for metadata

# query = "Find me books by George Orwell published after 1945 that are Romantic."  
query = "What is the workaround for PRB0000045393?"
# query = "What are the problem tickets with the category Software at PGAT?"


available_metadata_description = {    
    "category": (  
        "The category or type of the problem ticket. Possible values include 'People', 'Partners', 'Process', and 'Software'. "  
        "For example, if the ticket is related to software issues, the category would be 'Software'."  
    ),  
    "problem_number": (  
        "An alphanumeric identifier for the problem ticket that always starts with 'PRB' followed by a sequence of digits. "  
        "It can also be referred to as a ticket number, problem ticket, or problem number. "  
        "Examples include 'PRB0000045393', 'PRB0000123456'. These identifiers are used to track issues within the system and are not personal data."  
    ),  
    "location": (  
        "The location associated with the problem ticket, typically a factory where the issue originated. "  
        "The list of locations is non-exhaustive with some examples like: 'ALC4/WS Factory', 'CDAT Factory', 'D1 Factory', 'PGAT Factory' "  
        "For example, a ticket generated at the D1 Factory would have the location 'D1 Factory'."  
    )  
}  

available_metadata_prob = list(available_metadata_description.keys())

result = process_query(query, available_metadata_prob, available_metadata_description)  
print(result)  