# LLM Strict JSON Framework
- Created by John Tan Chong Min
- 3 Jul 2023
- Collaborators welcome

In [1]:
import os
import openai
import json
import re

#API Keys
os.environ['OPENAI_API_TOKEN'] = 'YOUR_API_KEY_HERE'

# Normal GPT Chat Function

- Normal way to invoke OpenAI API for GPT Models
- Can be very verbose in its replies
- Not well structured

In [3]:
def chat(system_prompt, user_prompt, model = 'gpt-3.5-turbo', temperature = 0, verbose = False):
    ''' Normal call of OpenAI API '''
    response = openai.ChatCompletion.create(
    temperature = temperature,
    model=model,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ])
    
    res = response['choices'][0]['message']['content']
    
    if verbose:
        print('System prompt:', system_prompt)
        print('User prompt:', user_prompt)
        print('GPT response:', res)
        
    return res

In [4]:
res = chat(system_prompt = "You are a friendly assistant", 
     user_prompt = "Is the number 5 even or odd?")
print(res)

The number 5 is an odd number.


# Strict Output Formatting
- Use when you want to force the function output to be a json format
- Helps a lot with minimizing unnecessary explanations of ChatGPT, and ensuring all output fields are there

In [5]:
def strict_output(system_prompt, user_prompt, output_format, default_category = "", output_value_only = False,
                  model = 'gpt-3.5-turbo', temperature = 0, num_tries = 2, verbose = False):
    ''' Ensures that OpenAI will always adhere to the desired output json format. 
    Uses rule-based iterative feedback to ask GPT to self-correct.
    Keeps trying up to num_tries it it does not. Returns empty json if unable to after num_tries iterations.
    If output field is a list, will treat as a classification problem and output best classification category.
    Text enclosed within < > will generated by GPT accordingly'''

    # if the user input is in a list, we also process the output as a list of json
    list_input = isinstance(user_prompt, list)
    # if the output format contains dynamic elements of < or >, then add to the prompt to handle dynamic elements
    dynamic_elements = '<' in str(output_format)
    # if the output format contains list elements of [ or ], then we add to the prompt to handle lists
    list_output = '[' in str(output_format)
    
    # start off with no error message
    error_msg = ''
    
    for i in range(num_tries):
        
        output_format_prompt = f'''\nYou are to output the following in json format: {output_format}. 
Do not put quotation marks or escape character \ in the output fields.'''
        
        if list_output:
            output_format_prompt += f'''\nIf output field is a list, classify output into the best element of the list.'''
        
        # if output_format contains dynamic elements, process it accordingly
        if dynamic_elements: 
            output_format_prompt += f'''
Any text enclosed by < and > indicates you must generate content to replace it. Example input: Go to <location>, Example output: Go to the garden
Any output key containing < and > indicates you must generate the key name to replace it. Example input: {{'<location>': 'description of location'}}, Example output: {{school: a place for education}}'''

        # if input is in a list format, ask it to generate json in a list
        if list_input:
            output_format_prompt += '''\nGenerate a list of json, one json for each input element.'''
            
        # Use OpenAI to get a response
        response = openai.ChatCompletion.create(
          temperature = temperature,
          model=model,
          messages=[
            {"role": "system", "content": system_prompt + output_format_prompt + error_msg},
            {"role": "user", "content": str(user_prompt)}
          ]
        )

        res = response['choices'][0]['message']['content'].replace('\'', '"')
        
        # ensure that we don't replace away aprostophes in text 
        res = re.sub(r"(\w)\"(\w)", r"\1'\2", res)

        if verbose:
            print('System prompt:', system_prompt + output_format_prompt + error_msg)
            print('\nUser prompt:', str(user_prompt))
            print('\nGPT response:', res)
        
        # try-catch block to ensure output format is adhered to
        try:
            output = json.loads(res)
            if isinstance(user_prompt, list):
                if not isinstance(output, list): raise Exception("Output format not in a list of json")
            else:
                output = [output]
                
            # check for each element in the output_list, the format is correctly adhered to
            for index in range(len(output)):
                for key in output_format.keys():
                    # unable to ensure accuracy of dynamic output header, so skip it
                    if '<' in key or '>' in key: continue
                    # if output field missing, raise an error
                    if key not in output[index]: raise Exception(f"{key} not in json output")
                    # check that one of the choices given for the list of words is an unknown
                    if isinstance(output_format[key], list):
                        choices = output_format[key]
                        # ensure output is not a list
                        if isinstance(output[index][key], list):
                            output[index][key] = output[index][key][0]
                        # output the default category (if any) if GPT is unable to identify the category
                        if output[index][key] not in choices and default_category:
                            output[index][key] = default_category
                        # if the output is a description format, get only the label
                        if ':' in output[index][key]:
                            output[index][key] = output[index][key].split(':')[0]
                            
                # if we just want the values for the outputs
                if output_value_only:
                    output[index] = [value for value in output[index].values()]
                    # just output without the list if there is only one element
                    if len(output[index]) == 1:
                        output[index] = output[index][0]
                    
            return output if list_input else output[0]

        except Exception as e:
            error_msg = f"\n\nResult: {res}\n\nError message: {str(e)}"
            print("An exception occurred:", str(e))
            print("Current invalid json format:", res)
         
    return {}

## Overall Open-ended generation
- **system_prompt**: Write in whatever you want GPT to become. "You are a \<purpose in life\>"
- **user_prompt**: The user input. Later, when we use it as a function, this is the function input
- **output_format**: JSON format with the key as the output key, and the value as the output description
    - The output keys will be preserved exactly, while GPT will generate content to match the description of the value as best as possible

In [6]:
text = '''
One, two, three, four, five,
Once I caught a fish alive,
Six, seven, eight, nine, ten,
Then I let it go again.
Why did you let it go?
Because it bit my finger so.
Which finger did it bite?
This little finger on my right'''

In [7]:
# Open-ended information extraction from text
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
                    user_prompt = text,
                    output_format = {"Summary": "Summarize the text in 10 words", "Entity Caught": "name of entity caught", 
                                 "Finger Bitten": "finger which was bitten", "Numbers": "List of numbers"})
print(res)

{'Summary': 'Once caught fish alive, let go, bit finger', 'Entity Caught': 'fish', 'Finger Bitten': 'little finger', 'Numbers': ['One', 'two', 'three', 'four', 'five', 'Six', 'seven', 'eight', 'nine', 'ten']}


## List-based constraining of outputs

- You can constrain the output of a field by using a list of categories. Then GPT will treat it as a classification problem and return one of the categories
    - Example input text: "I am so elated!"
    - Example output_format: {"Sentiment": ["happy", "sad", "neutral"]}
    - Example output: {"Sentiment": "happy"}

In [31]:
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
                    user_prompt = "I am so elated",
                    output_format = {"Sentiment": ["happy", "sad", "neutral"]})
print(res)

{'Sentiment': 'happy'}


In [10]:
# We want to constrain some input to within a fixed list
# If the right classification cannot be found in the list, GPT will output its proposed category name
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
                    user_prompt = text,
                    output_format = {"Summary": "Summarize the text in 10 words", "Entity Caught": ["living", "non-living"], 
                                 "Finger Bitten": ["left", "middle"], "Numbers": "List of numbers"})
print(res)

{'Summary': 'Once caught fish alive, let go, bit finger right', 'Entity Caught': 'living', 'Finger Bitten': 'right', 'Numbers': ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']}


In [32]:
# We want to constrain some input to within a fixed list
# If you want to GPT to flag out when a classification is not found in the list, assign some text to default_category
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
            user_prompt = text,
            output_format = {"Summary": "Summarize the text in 10 words", "Entity Caught": ["living", "non-living"], 
                                 "Finger Bitten": ["left", "middle"], "Numbers": "List of numbers"},
            default_category = "Unable to Classify")
print(res)

{'Summary': 'Once caught fish alive, let go, bit finger right', 'Entity Caught': 'living', 'Finger Bitten': 'Unable to Classify', 'Numbers': ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']}


## List-based label constraining of output

- You can also constrain the output of a field to label names, by defining the list in the following format {Label Name}: {Label Description}
    - Example input text: "I am so elated!"
    - Example output format: {"Sentiment": ["A: happy", "B: sad", "C: neutral"]}
    - Example output: {"Sentiment": "A"}

In [30]:
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
              user_prompt = "I am so elated",
              output_format = {"Sentiment": ["A: happy", "B: sad", "C: neutral"]})
print(res)

{'Sentiment': 'A'}


In [33]:
# If we are only interested in the label of the classification, we can specify the list as <Label Name>: <Label Description>
# It would force the output into one label regardless whether it agrees or not with the categories
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
              user_prompt = text,
              output_format = {"Summary": "Summarize the text in 10 words", "Entity Caught": ["living", "non-living"], 
                                 "Finger Bitten": ["A: left", "B: middle"], "Numbers": "List of numbers"})
print(res)

{'Summary': 'Once caught a fish alive, let go, bit finger', 'Entity Caught': 'living', 'Finger Bitten': 'A', 'Numbers': ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']}


In [17]:
# If we are only interested in the label of the classification, we can specify the list as <Label Name>: <Label Description>
# If you want to GPT to flag out when a label is inaccurate, assign some text to default_category
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
              user_prompt = text,
              output_format = {"Summary": "Summarize the text in 10 words", "Entity Caught": ["living", "non-living"], 
                                 "Finger Bitten": ["A: left", "B: middle"], "Numbers": "List of numbers"},
              default_category = "Unable to Classify")
print(res)

{'Summary': 'Once caught a fish alive, let go, bit finger', 'Entity Caught': 'living', 'Finger Bitten': 'Unable to Classify', 'Numbers': ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']}


# Dynamic output format
- Used when we want to constrain the output in a largely fixed format but allow for some flexibility in some areas
- Flexible areas are enclosed with <> alongside areas which are fixed
    - Example: "\<entity\> bit my \<entity\>" means that we want GPT to replace the two <entity> tags, but preserve "bit my" exactly
    - Example output: "Fish bit my finger"
- <> can also be applied to the keys of json, but we will not be doing strict output checks on those fields since it will be dynamically generated
- When <> is applied to the output key, GPT can generate the key name according to context

In [20]:
# If we are only interested in the label of the classification, we can specify the list as <Label Name>: <Label Description>
# If you want to GPT to flag out when a label is inaccurate, assign some text to default_category
res = strict_output(system_prompt = 'You are a friendly assistant meant to extract information from text', 
              user_prompt = text,
              output_format = {"Summary": "<entity> nibbled on my <entity>", "Entity Caught": ["living", "non-living"], 
                                "<location of injury>": "injury was caused by <entity>"})
print(res)

{'Summary': 'Fish nibbled on my finger', 'Entity Caught': 'living', 'finger': 'injury was caused by fish'}


## Chain-of-thought prompting via output format

- You can also perform chain-of-thought prompting by ordering the json fields in the right way
- Example
    - Day planner has much better output if broad plan is generated first before detailed plan
    - We can also prompt the model for thoughts, action, observation (ReAct framework) as part of json output
    - We can also prompt the model for reflection (RefleXion framework), and even combine the two together!

In [21]:
res = strict_output(system_prompt = 'You are a day planner, meant to schedule the events of the day with some constraints. Output an hourly-based schedule.', 
              user_prompt = '''To-do-list:
- 3 hour webinar on LLMs
- 3 hour board games session
- 1 hour for lunch and dinner
- 2 hour Netflix watching
- 2 hour badminton session

Constraints:
- Badminton session is the first thing of the day
- Lunch between 11am to 1pm
- Dinner between 6pm to 8pm
- Awake only from 9am to 10pm''',
              output_format = {"Detailed Plan": "Hourly-based schedule in a list"})
print(res)

{'Detailed Plan': ['9am - 11am: Badminton session', '11am - 12pm: Lunch', '12pm - 3pm: Webinar on LLMs', '3pm - 5pm: Board games session', '5pm - 6pm: Dinner', '6pm - 8pm: Netflix watching', '8pm - 10pm: Free time']}


In [22]:
res = strict_output(system_prompt = 'You are a day planner, meant to schedule the events of the day with some constraints. Output an hourly-based schedule.', 
              user_prompt = '''To-do-list:
- 3 hour webinar on LLMs
- 3 hour board games session
- 1 hour for lunch and dinner
- 2 hour Netflix watching
- 2 hour badminton session

Constraints:
- Badminton session is the first thing of the day
- Lunch between 11am to 1pm
- Dinner between 6pm to 8pm
- Awake only from 9am to 10pm''',
              output_format = {"Broad Plan": "Thoughts on how to achieve the desired schedule taking into account constraints", 
                               "Detailed Plan": "Hourly-based schedule in a list"})
print(res)

{'Broad Plan': 'Start the day with a 2 hour badminton session. Then, attend the 3 hour webinar on LLMs. Take a break for lunch from 11am to 1pm. After lunch, have a 3 hour board games session. Take another break for dinner from 6pm to 8pm. Finally, relax with 2 hours of Netflix watching before going to bed at 10pm.', 'Detailed Plan': ['9am - 11am: Badminton session', '11am - 1pm: Lunch', '1pm - 4pm: Webinar on LLMs', '4pm - 7pm: Board games session', '7pm - 8pm: Dinner', '8pm - 10pm: Netflix watching']}


In [23]:
# Robot Action Selector!
# Usually the observation will be returned by the environment (or tool use), but here we will get GPT to just imagine it
# we can just use the observation as the current state, and get the robot to continue generating the actions!
res['New State'] = "Robot at Area A, John at Area B, can of coke at Area C"
actions = []

for i in range(4):
    res = strict_output(system_prompt = '''You are a robot. User will give your your current state and your task. 
You are to choose an action to bring you closer to accomplishing the task.
Constraints:
- You pick up objects in the area automatically when you go into the area
- You need to pass the object to another person explicitly within the same area''', 
                  user_prompt = f'''Current State: {res['New State']}, Past Actions: {actions}
Task: Give a can of coke to John.''',
                  output_format = {"Current State": "describe the current state",
                                   "Thoughts": "Thoughts on how to achieve the task from current state", 
                                   "Action": ["move to <area>: move to <area>", 
                                              "pass <object> to <person>: usable only when robot has <object> and is at same location as <person>",
                                              "end: when nothing else is needed"], 
                                   "Narration of robot performing one action": "describe what the robot did from current state",
                                   "New State": "generate the locations of robot, person and coke are at after performing action from current state",
                                   "Reflection": "reflect on what has been done well, what has been done wrong",
                                   "Task completed": ["yes", "no"]})
    print(res)
    
    actions.append(res['Action'])
    # Exit if task is completed
    if res['Task completed'] == 'yes': break

{'Current State': 'Robot at Area A, John at Area B, can of coke at Area C', 'Thoughts': 'To give a can of coke to John, I need to pick up the can of coke from Area C and then move to Area B where John is located.', 'Action': 'move to Area C', 'Narration of robot performing one action': 'The robot moves to Area C to pick up the can of coke.', 'New State': 'Robot at Area C, John at Area B, can of coke at Robot', 'Reflection': 'The robot successfully picked up the can of coke from Area C.', 'Task completed': 'no'}
{'Current State': 'Robot at Area C, John at Area B, can of coke at Robot', 'Thoughts': 'To give a can of coke to John, I need to move to Area B where John is located and then pass the can of coke to him.', 'Action': 'move to Area B', 'Narration of robot performing one action': 'The robot moves to Area B.', 'New State': 'Robot at Area B, John at Area B, can of coke at Robot', 'Reflection': 'Moving to the correct area is a good step towards completing the task. However, I still ne

# Handling Input as a List
- In order to save tokens, we may want to process multiple input items using the same output_format schema
- We can then pass in a list into user_prompt to get the function to output a list of json
- There will be one json in the output for each element of the input list

In [34]:
## We can get a list of json for each element in user_prompt
out = strict_output(system_prompt = "You are to classify the user sentiments.",
                   user_prompt = ["This is such a beautiful day", "My heart is aching", "Time is passing by so slowly"],
                   output_format = {"Sentiment": ["sad", "boring", "happy", "unknown"]})
print(out)

[{'Sentiment': 'happy'}, {'Sentiment': 'sad'}, {'Sentiment': 'boring'}]


In [35]:
## We can get a list of json for each element in user_prompt
## Each json can contain multiple elements
out = strict_output(system_prompt = "You are to classify the user sentiments.",
                   user_prompt = ["This is such a beautiful day", "My heart is aching", "Time is passing by so slowly"],
                   output_format = {"Sentiment": ["A: sad", "B: boring", "C: happy", "D: unknown"],
                                   "<main entity>": "Definition of entity"})
print(out)

[{'Sentiment': 'C', 'beautiful day': 'a day that is pleasing to the senses'}, {'Sentiment': 'A', 'heart': 'the organ in the body that pumps blood'}, {'Sentiment': 'B', 'time': 'the indefinite continued progress of existence and events in the past, present, and future regarded as a whole'}]


In [36]:
## Ensures that output works for multiple item list format
## We can get list of values by setting output_values_only to be true
## Note: This is just post-processing, output of GPT will still be a json
out = strict_output(system_prompt = "You are to classify the user sentiments.",
                   user_prompt = ["This is such a beautiful day", "My heart is aching", "Time is passing by so slowly"],
                   output_format = {"Sentiment": ["A: sad", "B: boring", "C: happy", "D: unknown"]},
                   output_value_only = True)
print(out)

['C', 'A', 'B']
