References:
* OpenAI's API: https://platform.openai.com/docs/guides/gpt/function-calling
* Functions Cookbook: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_call_functions_with_chat_models.ipynb

In [3]:
# This notebook tests the new function calling feature of GPT-3.5-turbo.

# Set up chatgpt api
import openai
import json
import os

In [127]:
GPT_KEY = os.environ['OPENAI_API_KEY']
GPT_MODEL_OLD = "gpt-3.5-turbo"
GPT_MODEL = "gpt-3.5-turbo-0613"
GPT4_MODEL = "gpt-4-0613"


# 0. The problem of unstructured text generation

LLMs ingeneral and ChatGPT in particular only aim to generate the next most feasible token. This has made it notoriously hard to incorporate LLMs with existing applications which require structured data.

In [200]:
user_message = "Hi, can I buy half a pound of salmon?"

def parser_without_functions(message):
    """
    Parse the product name, quantity, and unit from the user's message.
    """
    gpt_response = openai.ChatCompletion.create(
        model=GPT_MODEL_OLD,
        messages=[
            {"role": "system", "content": """Parse the product name, quantity, 
                and unit from the user's message. Return ONLY a JSON object 
                with the product_name, product_quantity, and unit. Do not 
                include any other information."""},
            {"role": "user", "content": message}
            ],
    )
    data = gpt_response.choices[0]["message"]["content"]
    try:
        data = json.loads(data)
    except:
        pass
    return data, gpt_response

In [37]:
for i in range(3):
    data, gpt_response = parser_without_functions(user_message)
    print(f"Output #{i}:", data, "\n====================\n")

Output #0: Sure! Here's the parsed information:

{
  "product_name": "salmon",
  "product_quantity": 0.5,
  "unit": "pound"
} 

Output #1: Sure, for what type of salmon would you like to buy half a pound? 

Output #2: Sure! Here's the extracted information in JSON format:

```
{
  "product_name": "salmon",
  "product_quantity": 0.5,
  "unit": "pound"
}
``` 



## 1. Parse information

In [198]:
describe__get_product_info = {
    "name": "get_product_info",
    "description": """Parse the product name, quantity, and unit from the 
        user's message.""",
    "parameters": {
        "type": "object",
        "properties": {
            "product_name": {
                "type": "string",
                "description": "The name of the product"
                },
            "product_quantity": {
                "type": "number", 
                "description": "The quantity of the product."
                },
            "unit": {
                "type": "string", 
                "description": "The unit of the product.", 
                "enum": ["kg", "g", "lb", "oz", "unit"]
                },
        },
        "required": ["product_name", "product_quantity", "unit"],
    }
}

In [201]:
def parser_with_functions(message):
    """
    Parse the product name, quantity, and unit from the user's message.
    """
    gpt_response = openai.ChatCompletion.create(
        model=GPT_MODEL,
        messages=[{"role": "user", "content": message}],
        functions=[describe__get_product_info],
        function_call={"name": "get_product_info"},
    )
    data_json = gpt_response["choices"][0]["message"]['function_call']['arguments']
    data = json.loads(data_json)
    return data, gpt_response

for i in range(3):
    data, gpt_response = parser_with_functions(user_message)
    print(f"Output #{i}:", data, "\n====================\n")

Output #0: {'product_name': 'salmon', 'product_quantity': 0.5, 'unit': 'lb'} 

Output #1: {'product_name': 'salmon', 'product_quantity': 0.5, 'unit': 'lb'} 

Output #2: {'product_name': 'salmon', 'product_quantity': 0.5, 'unit': 'lb'} 



In [195]:
user_message = "Please add to cart 5 apples"
for i in range(3):
    data, gpt_response = parser_with_functions(user_message)
    print(f"Output #{i}:", data, "\n====================\n")

Output #0: {'product_name': 'apples', 'product_quantity': 5, 'unit': 'unit'} 

Output #1: {'product_name': 'apples', 'product_quantity': 5, 'unit': 'unit'} 

Output #2: {'product_name': 'apples', 'product_quantity': 5, 'unit': 'unit'} 



## 2. Get structured answers from ChatGPT

In [203]:
text = """Einstein married Elsa Lowenthal on 2 June 1919, after having had 
    a relationship with her since 1912. In 1933, they emigrated to the 
    United States and stayed for the rest of their lives. In 1935, Elsa 
    Einstein was diagnosed with heart and kidney problems; she died in 
    December 1936. """
good_summary = "Elsa Einstein was diagnosed with health problems in the US."
bad_summary = "Elsa Einstein was diagnosed with health problems in Germany."

In [204]:
desc__check_contradiction = {
    "name": "check_contradiction",
    "description": "Check the contradiction between the text and the summary",
    "parameters": {
        "type": "object",
        "properties": {
            "analysis":{
                "type": "string",
                "description": """Logical analysis comparing the summary with 
                    the text content."""
            },
            "assessment": {
                "type": "string", 
                "description": """Final assessment of the contradiction 
                    between the text and the summary""",
                "enum": ["contradiction", "entailment"]
            },
        },
        "required": ["analysis", "assessment"],
    }
}

In [205]:
def is_contradiction(text, summary):
    """Check if the summary contradict the text"""
    content = f"""[Text]\n{text}\n---\n[Summary]\n{summary}"""
    gpt_response = openai.ChatCompletion.create(
        model=GPT4_MODEL,
        messages=[{"role": "user", "content": content}],
        functions=[desc__check_contradiction],
        function_call={"name":"check_contradiction"},
    )
    data_js = gpt_response.choices[0]["message"]['function_call']['arguments']
    data = json.loads(data_js)
    if data["assessment"] == "contradiction":
        return True, data
    elif data["assessment"] == "entailment":
        return False, data
    else:
        raise Exception("Invalid GPT response")

In [None]:
print("Good Summary\n")
for i in range(3):
    assessment, data = is_contradiction(text, good_summary)
    print("Analysis:", data["analysis"])
    print(f"Final ssessment #{i}. Summary contradict text:", assessment, 
          "\n====================\n")

In [129]:
print("Bad Summary\n")
for i in range(3):
    assessment, data = is_contradiction(tetxt, bad_summary)
    print("Analysis:", data["analysis"])
    print(f"Final ssessment #{i}. Summary contradict text:", assessment, 
          "\n====================\n")

Bad Summary

Analysis: The summary states that Elsa Einstein was diagnosed with health problems in Germany. In the text it is indicated that Elsa Einstein was diagnosed with heart and kidney problems, but the location of the diagnosis is not stated. Therefore, one cannot confirm or negate that the diagnosis took place in Germany based on the provided text.
Final ssessment #0. Summary contradict text: True 

Analysis: The text mentions that Elsa Einstein was diagnosed with health problems, but it does not specify the country this occurred in. The summary assumes that this happened in Germany, which might not necessarily be the case as the text also mentions that they emigrated to United States.
Final ssessment #1. Summary contradict text: True 

Analysis: The text provides detailed information about Elsa Einstein's life, from her marriage to Einstein in 1919, their emigration to the United States in 1933, as well as her diagnosis of heart and kidney problems in 1935. However, it doesn't

# Allow chatbot to use external tools

ChatGPT is powerful, however, it is still limited in many ways like not having access to the internet, limited ability to do math. Using functions, we can enable ChatGPT to use external tools to supplement its capabilities. In fact, this is probably the main use of functions that OpenAI's developer had in mind.

Can already be done with Lang Chain and similar wrappers, but the stability are not gauranteed and not considered "native support" by OpenAI

In [209]:
import sqlite3
from pprint import pprint

conn = sqlite3.connect('data/sqlite/grocery_txn.db')
conn.execute("""CREATE TABLE IF NOT EXISTS transactions (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    user_name TEXT NOT NULL,
    product_name TEXT NOT NULL,
    quantity INTEGER NOT NULL,
    price REAL NOT NULL,
    total REAL NOT NULL
);""")
rows = [
    ('john', 'apple', 10, 1.0, 10.0),
    ('john', 'orange', 5, 0.5, 2.5),
    ('hannah', 'apple', 5, 1.0, 5.0),
    ('hannah', 'orange', 10, 0.5, 5.0),
    ('hannah', 'banana', 5, 0.2, 1)]
conn.executemany("""
    INSERT INTO transactions (user_name, product_name, quantity, price, total)
    VALUES (?, ?, ?, ?, ?)""", 
    rows)
conn.commit()
print("id, user_name, product_name, quantity, price, total")
pprint(conn.execute("SELECT * FROM transactions").fetchall())
conn.execute(
    "SELECT user_name, sum(total) as spending FROM transactions group by 1"
    ).fetchall()

database_schema_string = """Table: transactions
    Columns: id, user_name, product_name, quantity, price, total"""

id, user_name, product_name, quantity, price, total
[(1, 'john', 'apple', 10, 1.0, 10.0),
 (2, 'john', 'orange', 5, 0.5, 2.5),
 (3, 'hannah', 'apple', 5, 1.0, 5.0),
 (4, 'hannah', 'orange', 10, 0.5, 5.0),
 (5, 'hannah', 'banana', 5, 0.2, 1.0)]


In [237]:
desc__query_database = {
    "name": "query_database",
    "description": "Generate fully-formed SQL query to answer user's questions",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": f"""The SQL query extracting relevant info to 
                    the user's question. Use the following schema:
                        {database_schema_string}""",
            }
        },
        "required": ["query"],
    },
}

def query_database(conn, query):
    try:
        results = str(conn.execute(query).fetchall())
    except Exception as e:
        results = f"Query execution failed with error: {e}"
    return results

def execute_function_call(f_call):
    if f_call["name"] == "query_database":
        q = json.loads(f_call["arguments"])["query"]
        results = query_database(conn, q)
    else:
        results = f"""Error: function {f_call["name"]} does not exist"""
    return results

In [246]:
convo = []
convo.append({"role": "system", "content": """Answer user questions by 
    generating SQL queries against the Grocery Transaction Database."""})
convo.append({"role": "user", "content": "Hi, how many apples were sold?"})

In [247]:
def generate_convo(convo, max_internal_turns=10):
    i = 0
    while ((i <= max_internal_turns) and not (
        convo[-1]["role"] == "assistant" and not convo[-1].get("function_call")
        )):
        # Get gpt response
        gpt_response = openai.ChatCompletion.create(
            model=GPT_MODEL,
            messages=convo,
            functions=[desc__query_database],
        function_call="auto",
        )
        # Extract the assistant's message and append to the conversation
        assistant_message = gpt_response["choices"][0]["message"]
        convo.append(assistant_message)
        # If the model suggested using a function call, get the result
        f_call = assistant_message.get("function_call")
        if f_call:
            results = execute_function_call(f_call)
            convo.append({
                "role": "function", "name": f_call["name"], "content": results
                })
        i += 1
    return convo


In [248]:
generate_convo(convo)    
pprint(convo)


[{'content': 'Answer user questions by \n'
             '    generating SQL queries against the Grocery Transaction '
             'Database.',
  'role': 'system'},
 {'content': 'Hi, how many apples were sold?', 'role': 'user'},
 {'content': None,
  'function_call': {'arguments': '{\n'
                                 '  "query": "SELECT SUM(quantity) AS '
                                 'total_apples FROM transactions WHERE '
                                 'product_name = \'apple\'"\n'
                                 '}',
                    'name': 'query_database'},
  'role': 'assistant'},
 {'content': '[(15,)]', 'name': 'query_database', 'role': 'function'},
 {'content': 'A total of 15 apples were sold.',
  'role': 'assistant'}]


In [249]:
# We can choose to expose only actual results and hide the function calls
# Also, we can add some color coding to the output
def pprint_convo(convo, hide_internal=True):
    CYAN, GREEN, RED, BOLD = "\033[96m", "\033[92m", "\033[31m", "\033[1m"
    col_role = {"user":CYAN, "assistant":GREEN, "system":RED, "function":RED}
    begin_role = {"user":"\n", "assistant":"", "system":"", "function":""}
    
    for mes in convo:
        role = mes["role"]
        cont = mes["content"]
        if mes["role"] not in ["user", "assistant"] or mes.get("function_call"):
            if hide_internal:
                continue
        
        print("{fm_role}{role}:\033[0m {fm_content}{cont}\033[0m".format(
            fm_role = begin_role[role] + col_role[role] + BOLD,
            role=role,
            fm_content = col_role[role],
            cont=cont
        ))


In [250]:
pprint_convo(convo)


[96m[1muser:[0m [96mHi, how many apples were sold?[0m
[92m[1massistant:[0m [92mA total of 15 apples were sold.[0m


In [251]:
convo.append({
    "role": "user", 
    "content": "Who are the top 2 buyers in number of unique products?"})
generate_convo(convo)
pprint_convo(convo)



[96m[1muser:[0m [96mHi, how many apples were sold?[0m
[92m[1massistant:[0m [92mA total of 15 apples were sold.[0m

[96m[1muser:[0m [96mWho are the top 2 buyers in number of unique products?[0m
[92m[1massistant:[0m [92mThe top 2 buyers in terms of the number of unique products purchased are:
1. hannah - purchased 3 unique products
2. john - purchased 2 unique products[0m


In [252]:
convo.append({
    "role": "user", 
    "content": "What product did Hannah bought but John didn't?"
    })
generate_convo(convo)
pprint_convo(convo)



[96m[1muser:[0m [96mHi, how many apples were sold?[0m
[92m[1massistant:[0m [92mA total of 15 apples were sold.[0m

[96m[1muser:[0m [96mWho are the top 2 buyers in number of unique products?[0m
[92m[1massistant:[0m [92mThe top 2 buyers in terms of the number of unique products purchased are:
1. hannah - purchased 3 unique products
2. john - purchased 2 unique products[0m

[96m[1muser:[0m [96mWhat product did Hannah bought but John didn't?[0m
[92m[1massistant:[0m [92mHannah bought a product that John didn't, and that product is a banana.[0m


In [253]:
pprint_convo(convo, hide_internal=False)

[31m[1msystem:[0m [31mAnswer user questions by 
    generating SQL queries against the Grocery Transaction Database.[0m

[96m[1muser:[0m [96mHi, how many apples were sold?[0m
[92m[1massistant:[0m [92mNone[0m
[31m[1mfunction:[0m [31m[(15,)][0m
[92m[1massistant:[0m [92mA total of 15 apples were sold.[0m

[96m[1muser:[0m [96mWho are the top 2 buyers in number of unique products?[0m
[92m[1massistant:[0m [92mNone[0m
[31m[1mfunction:[0m [31m[('hannah', 3), ('john', 2)][0m
[92m[1massistant:[0m [92mThe top 2 buyers in terms of the number of unique products purchased are:
1. hannah - purchased 3 unique products
2. john - purchased 2 unique products[0m

[96m[1muser:[0m [96mWhat product did Hannah bought but John didn't?[0m
[92m[1massistant:[0m [92mNone[0m
[31m[1mfunction:[0m [31m[('banana',)][0m
[92m[1massistant:[0m [92mHannah bought a product that John didn't, and that product is a banana.[0m


In [254]:
convo

[{'role': 'system',
  'content': 'Answer user questions by \n    generating SQL queries against the Grocery Transaction Database.'},
 {'role': 'user', 'content': 'Hi, how many apples were sold?'},
 <OpenAIObject at 0x14ec57220> JSON: {
   "content": null,
   "function_call": {
     "arguments": "{\n  \"query\": \"SELECT SUM(quantity) AS total_apples FROM transactions WHERE product_name = 'apple'\"\n}",
     "name": "query_database"
   },
   "role": "assistant"
 },
 {'role': 'function', 'name': 'query_database', 'content': '[(15,)]'},
 <OpenAIObject at 0x14ec57ea0> JSON: {
   "content": "A total of 15 apples were sold.",
   "role": "assistant"
 },
 {'role': 'user',
  'content': 'Who are the top 2 buyers in number of unique products?'},
 <OpenAIObject at 0x14f6d29a0> JSON: {
   "content": null,
   "function_call": {
     "arguments": "{\n  \"query\": \"SELECT user_name, COUNT(DISTINCT product_name) AS unique_products FROM transactions GROUP BY user_name ORDER BY unique_products DESC LIMI