# Extracting information from input in structured manner

This notebook performs simple extraction of information about a coffee order.

The usual boiler plate to import libraries and set the model.

You'll need your OpenAI key set as environment variable `OPENAI_API_KEY`

In [1]:
import json
import openai
import requests
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored

GPT_MODEL = "gpt-4"

This is is the usual function to make a call to ChatGPT API completetion endpoint, taking a list of messages 
and optionally a list of functions, and optionally an instruction to create arguments for a specified function

In [2]:
@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, functions=None, function_call=None, model=GPT_MODEL):
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer " + openai.api_key,
    }
    json_data = {"model": model, "messages": messages}
    if functions is not None:
        json_data.update({"functions": functions})
    if function_call is not None:
        json_data.update({"function_call": function_call})
    try:
        response = requests.post(
            "https://api.openai.com/v1/chat/completions",
            headers=headers,
            json=json_data,
        )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e

This function color codes the messages by role:

In [3]:
def pretty_print_conversation(messages):
    role_to_color = {
        "system": "yellow",
        "user": "green",
        "assistant": "blue",
        "function": "magenta",
    }
    
    for message in messages:
        if message["role"] == "system":
            print(colored(f"system: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "user":
            print(colored(f"user: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "assistant" and message.get("function_call"):
            print(colored(f"assistant: {message['function_call']}\n", role_to_color['function']))
        elif message["role"] == "assistant" and not message.get("function_call"):
            print(colored(f"assistant: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "function":
            print(colored(f"function ({message['name']}): {message['content']}\n", role_to_color[message["role"]]))

Next, we define the function arguments - the structured data we want to extract from the free text.

In [4]:
## We could send this to an automated coffee machine
def coffee (coffee_order):
    print(f'Ordering coffee: {json.dumps(coffee_order, indent=2)}')

## a dictionary of functions keyed by their name. 
available_functions = {
    "coffee":coffee, 
}

## The function definitions we will send to ChatGPT. The 'parameters' object is defined using JSON Schema.
functions = [
    {
        "name" : "coffee",
        "description": " Get the coffee order from the input ",
        "parameters" : {
            "type": "object",
            "properties" : {
                  "order": {
                      "type":"object",
                      "properties" : {
                        "coffee_type": {
                         "type":"string"
                        }, 
                        "temperature": {
                          "type":"string"
                        },
                        "size": {
                          "type":"string"
                        },
                        "milks": {
                          "type":"string"
                        },
                        "sugar": {
                          "type":"string"
                        },
                        "sugar_count": {
                          "type":"number"
                        },
                        "syrups": {
                          "type":"string"
                        }
                    }         
                }
            }
        }
    }
]
        

In [5]:
inputs = [
    "I'd like a tall cold latte please with 2 sugars and hazelnut syrup",
    "What's the weather in London"
]

In [6]:

for input in inputs:
    messages = [
      {
     "role":"system",
     "content":"Extract user input into structured data"
      }
    ]
    messages.append({"role":"user", "content" : input})
    resp = chat_completion_request(messages, functions=functions)

    response_message = resp.json()['choices'][0]['message']
    
    messages.append(response_message)
    pretty_print_conversation(messages)
    
    if 'function_call' in response_message and response_message['function_call'] is not None:
        to_call = response_message['function_call']['name']
        f_args = json.loads(response_message['function_call']['arguments'])['order']
        result = available_functions[to_call](f_args)

    else:
        print(f'no function was returned')
    print ('-----------------------------------')
    print ('-----------------------------------')
    

[33msystem: Extract user input into structured data
[0m
[32muser: I'd like a tall cold latte please with 2 sugars and hazelnut syrup
[0m
[35massistant: {'name': 'coffee', 'arguments': '{\n  "order": {\n    "coffee_type": "latte",\n    "temperature": "cold",\n    "size": "tall",\n    "sugar": "yes",\n    "sugar_count": 2,\n    "syrups": "hazelnut"\n  }\n}'}
[0m
Ordering coffee: {
  "coffee_type": "latte",
  "temperature": "cold",
  "size": "tall",
  "sugar": "yes",
  "sugar_count": 2,
  "syrups": "hazelnut"
}
-----------------------------------
-----------------------------------
[33msystem: Extract user input into structured data
[0m
[32muser: What's the weather in London
[0m
[34massistant: This query does not call for a coffee order extraction, hence no structured data needs to be output.
[0m
no function was returned
-----------------------------------
-----------------------------------
