# How to reduce hallucination

## Objectives:
1. Show how to utilize function calls to create structured data
2. Demonstrate the limitations of function calling in hallucinating
3. Develop and test some methods to prevent hallucination

In [1]:
import openai
import pandas as pd
from datetime import datetime
import json
import os
from string import Template
import time
from ipywidgets import IntProgress
from IPython.display import display

In [2]:
# Currently 0613 model versions are required to utilize function calling (as of 6/25/23)
# Evenutally you will be able to just use gpt-3.5-turbo or gpt-4
model = "gpt-3.5-turbo-0613"

In [3]:
def get_model_completion(
    messages : list, 
    functions : dict = None , 
    function_call : str = "none") -> any:
    # Setting the openai api key -- I have found it is better to set here so that it doesn't time out
    openai.api_key = os.getenv('OPENAI_API_KEY')
    # Running the normal sans functions chat completion
    if functions == None:
        response = openai.ChatCompletion.create(
            model = model,
            messages = messages,
            
        )
    # Running chat completion when you have 
    else:
        if function_call == None:
            function_call = 'auto'
        elif function_call != 'auto' and type(function_call) != dict:
            function_call = {"name" : function_call}
            
        response = openai.ChatCompletion.create(
            model = model,
            messages = messages,
            functions = functions,
            function_call = function_call,
            temperature = 0
        )
    message = response["choices"][0]["message"]
    # Conditionally returning the json data or the message string depending on the format
    if message.get("function_call"):
        return json.loads(message["function_call"]["arguments"])
    else:
        return message["content"]

## Utilizing function calling to create structured data

In [4]:
# First we are going to use gpt to create the a list of product reviews
categories = [
    "headphones",
    "speakers",
    "coolers",
    "RC cars"
]

base_prompt = "You are a product review writing assistant. The output you create will be used to \
test functiionality of a large language model. You are to write a review about a product with the \
following details: \
product category: ${category} \
review_sentiment: ${sentiment}"

prompt_template = Template(base_prompt)

max_count = 15
f = IntProgress(min = 0, max=max_count)
display(f)

z = 1
reviews = []
for _ in range(4):
    for category in categories:
        if z % 3 == 0:
            time.sleep(60)
        if z % 3 == 0:
            sentiment = "positive"
        elif z % 2 == 0:
            sentiment = "negative"
        else:
            sentiment = "neutral"
        
        messages = [
            {
                "role" : "user",
                "content" : prompt_template.substitute(category = category, sentiment = sentiment)
            }
        ]
        try:
            review = get_model_completion(messages)
            reviews.append(review)
        except:
            time.sleep(60)
            try:
                review = get_model_completion(messages)
                reviews.append(review)
            except:
                pass
        z += 1
        f.value += 1

IntProgress(value=0, max=15)

In [5]:
# Look at one of the reviews
print(reviews[0])

Title: A Well-Priced Pair of Headphones for Everyday Use

I recently purchased a pair of headphones from [Product Name], and overall, I have mixed feelings about my experience. While they aren't groundbreaking in terms of features or audio quality, they do offer decent performance for the price.

In terms of design, these headphones have a sleek and modern look that I find quite appealing. The build quality is satisfactory, and they feel sturdy enough to withstand regular usage. The adjustable headband and comfortably cushioned ear cups ensure a snug fit for extended listening sessions.

Although the audio quality is not exceptional, it is satisfactory for casual listening. The sound is clear and crisp, providing an immersive experience while watching movies or listening to music. However, audiophiles might find the bass response a bit underwhelming, and the overall soundstage lacks depth compared to higher-end models.

The noise isolation feature works reasonably well, effectively blo

## Setting up the function that we want to be calling with gpt

### Guidelines:
1. Make your descriptions like you are giving a set of instructions for a human agent that would be processing this data
2. Utilize tools like enum to bound the responses for fields with a list of acceptable fields
3. Schema set up is explained in detail [here](https://json-schema.org/understanding-json-schema/)

In [6]:
# Create the function and parsing that we want for utilizing these product reviews
def review_processing(product : str, product_category: str, sentiment: str, summary: str):
    data = {
        "product" : product,
        "product_category" : product_category,
        "sentiment" : sentiment,
        "summary" : summary
    }
    df = pd.DataFrame.from_dict(data, orient = 'index')
    df = df.T
    return df

functions = [
    {
        "name" : "review_processing",
        "description" : "This function will intake a product review and will format the data as a pandas dataframe",
        "parameters" : {
            "type" : "object",
            "properties" : {
                "product" : {
                    "type" : "string",
                    "description" : "The name of the product being reviewed"
                },
                "product_category" : {
                    "type" : "string",
                    "description" : "This is the category of the product being reviewed. Example : Headphones"
                },
                "sentiment" : {
                    "type" : "string",
                    "description" : "This is the user tone of the reveiw",
                    "enum" : ["positive", "negative", "neutral"]
                },
                "summary" : {
                    "type" : "string",
                    "description" : "A very short summary for an executive level audience of the product review"
                }
            },
            "required" : ["product", "product_category", "sentiment", "summary"]
        }
    }
]

In [7]:
df = pd.DataFrame()
max_count = len(reviews)
f = IntProgress(min = 0, max=max_count)
display(f)

for idx, review in enumerate(reviews):
    prompt = f"You are a review processing assistant. Your role is to take a product review delimited by triple \
    back ticks and return a structured response as defined by the function review_processing \
    ```\
    {review}\
    ```"
    messages = [
        {
            "role" : "user",
            "content" : prompt
        }
    ]
    if (idx + 1) % 3 == 0:
        time.sleep(60)
    function_params = None
    try:
        function_params = get_model_completion(messages, functions = functions, function_call = "review_processing")
    except:
        time.sleep(60)
        try:
            function_params = get_model_completion(messages, functions = functions, function_call = "review_processing")
        except:
            pass
    if function_params != None:
        if df.empty:
            df = review_processing(**function_params)
        else:
            d = review_processing(**function_params)
            df = pd.concat([df, d], axis = 0).reset_index(drop = True)
    f.value += 1

IntProgress(value=0, max=16)

In [8]:
df

Unnamed: 0,product,product_category,sentiment,summary
0,[Product Name],Headphones,neutral,A Well-Priced Pair of Headphones for Everyday Use
1,speakers,Audio,negative,Disappointing Sound Quality and Lackluster Exp...
2,[Product Name],Coolers,positive,The Ultimate Cooler for All Your Outdoor Adven...
3,RC Car,Toys,negative,Disappointed with the Lackluster Performance o...
4,XYZ Headphones,Headphones,neutral,A Solid Choice for Everyday Use
5,Speakers,Audio,positive,Exceptional Sound and Sleek Design - A Game-Ch...
6,Cooler,Coolers,neutral,A Decent Cooler with Room for Improvement
7,RC Car,Toys,negative,Extremely Disappointed with the RC Car
8,Headphones,Headphones,positive,The XYZ headphones are a fantastic product tha...
9,Speakers,Speakers,negative,Disappointing Sound Quality


## Stage 2 -- highlighting limitations

The biggest issue that I have encountered with function calling is how likely the model to hallucinate when it encounters incomplete information.

In [9]:
# Lets create a new function but only provide the model with partial data to highlight the issues

functions = [
    {
        "name" : "product_task",
        "description" : "This function will take in datapoints about a new product task and return them \
        organized as a python dictionary",
        "parameters" : {
            "type" : "object",
            "properties" : {
                "title" : {
                    "type" : "string",
                    "description" : "This is a descriptive title of the project. \
                    Example : [Chat Bot] - Create the api endpoint"
                },
                "description" : {
                    "type" : "string",
                    "description" : "This is a list of steps for the engineering team to follow to complete this task"
                },
                "assignee" : {
                    "type" : "string",
                    "description" : "This is the name of the person who will be completing this task",
                    "enum" : ["Nelson", "LeBron James", "Sam Altman"]
                },
                "deadline" : {
                    "type" : "string",
                    "description" : f"This is the date the project is to be completed by. This has to be after \
                    {datetime.now().date()}. It is to be formatted as year-month-day"
                }
            },
            "required" : ["title", "description", "assignee", "deadline"]
        }
    }
]

In [10]:
# Running a prompt that will leave out deadline and assignee on purpose
test_prompt = """
You are product management assistant. Your role is to chat about new features /
or products that they want to produce.  You will then format that information to use the function /
jira_api_call. The user input will be delimited by triple back ticks.

```
Create a new api endpoint that will take in user input, call a large language, and then return /
the model output to a user.
```
"""

In [11]:
assignee = 0
deadline = 0
number_of_runs = 0

max_count = 50
f = IntProgress(min = 0, max=max_count)
display(f)
for i in range(50):
    if i % 3 == 0:
        time.sleep(60)
    
    messages = [
        {
            "role" : "user",
            "content" : test_prompt
        }
    ]
    try:
        output = get_model_completion(messages, functions = functions, function_call = "product_task")
        if "assignee" in list(output.keys()):
            assignee += 1
        if "deadline" in list(output.keys()):
            deadline += 1
        number_of_runs += 1
    except:
        pass
    f.value += 1
    
assignee_hallucination_percent = assignee / number_of_runs
deadline_hallucination_percent = deadline / number_of_runs

st = "assignee_hallucination_percent: {:.1%} deadline_hallucination_percent: {:.1%}".format(assignee_hallucination_percent, deadline_hallucination_percent)
print(st)

IntProgress(value=0, max=50)

assignee_hallucination_percent: 100.0% deadline_hallucination_percent: 100.0%


## Stage 3 - Fix some of those limitations

### Guidelines:
1. First step here is to tell the model not to lie to you. Use phrases like `do not make something up`
2. If you know that it is possible that fields will be left out make them optional
3. Give the model a way out using phrases like `return -1 if unknown`
4. If you are using things like enums make sure to include -1 in your options

In [12]:
# Updating the fields knowing that we want to prevent the model from hallucinating on assignee and deadline

functions = [
    {
        "name" : "product_task",
        "description" : "This function will take in datapoints about a new product task and return them \
        organized as a python dictionary",
        "parameters" : {
            "type" : "object",
            "properties" : {
                "title" : {
                    "type" : "string",
                    "description" : "This is a descriptive title of the project. \
                    Example : [Chat Bot] - Create the api endpoint"
                },
                "description" : {
                    "type" : "string",
                    "description" : "This is a list of steps for the engineering team to follow to complete this task"
                },
                "assignee" : {
                    "type" : "string",
                    "description" : "This is the name of the person who will be completing this task. Do not guess \
                    Do not make something up. If unknown return -1",
                    "enum" : ["Nelson", "LeBron James", "Sam Altman", "-1"]
                },
                "deadline" : {
                    "type" : "string",
                    "description" : f"This is the date the project is to be completed by. This has to be after \
                    {datetime.now().date()}. It is to be formatted as year-month-day. Do not guess. Do not make \
                    something up. If unknown return -1"
                }
            },
            "required" : ["title", "description"]
        }
    }
]

In [13]:
assignee = 0
deadline = 0
number_of_runs = 0

max_count = 50
f = IntProgress(min = 0, max=max_count)
display(f)
for i in range(50):
    if i % 3 == 0:
        time.sleep(60)
    
    messages = [
        {
            "role" : "user",
            "content" : test_prompt
        }
    ]
    try:
        output = get_model_completion(messages, functions = functions, function_call = "product_task")
        if "assignee" in list(output.keys()):
            if output["assignee"] != "-1":
                assignee += 1
        if "deadline" in list(output.keys()):
            if output["deadline"] != "-1":
                deadline += 1
        number_of_runs += 1
    except:
        pass
    f.value += 1
    
assignee_hallucination_percent = assignee / number_of_runs
deadline_hallucination_percent = deadline / number_of_runs

st = "assignee_hallucination_percent: {:.1%} deadline_hallucination_percent: {:.1%}".format(assignee_hallucination_percent, deadline_hallucination_percent)
print(st)

IntProgress(value=0, max=50)

assignee_hallucination_percent: 0.0% deadline_hallucination_percent: 0.0%
