# StrictJSON Generator
- Generates StrictJSON code and runs it based on user task and inputs
- Created by John Tan Chong Min
- 25 Sep 2024

In [1]:
from taskgen import *

In [2]:
import os
os.environ['OPENAI_API_KEY'] = '<YOUR API KEY HERE>'

In [4]:
def llm(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from openai import OpenAI
    
    # define your own LLM here
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

In [98]:
documentation = """# Features:
# 1. Basic Generation

- **system_prompt**: Write in whatever you want the LLM to become. "You are a \<purpose in life\>"
- **user_prompt**: The user input. Later, when we use it as a function, this is the function input
- **output_format**: JSON of output variables in a dictionary, with the key as the output key, and the value as the output description
    - The output keys will be preserved exactly, while the LLM will generate content to match the description of the value as best as possible
- **llm**: The llm you want to use. Takes in `system_prompt` and `user_prompt` and outputs the LLM-generated string

#### Example LLM Definition
```python
def llm(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from openai import OpenAI
    
    # define your own LLM here
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content
```

#### Example Usage
```python
res = strict_json(system_prompt = 'You are a classifier',
                    user_prompt = 'It is a beautiful and sunny day',
                    output_format = {'Sentiment': 'Type of Sentiment',
                                    'Adjectives': 'Array of adjectives',
                                    'Words': 'Number of words'},
                    llm = llm)
                                    
print(res)
```

#### Example Output
```{'Sentiment': 'Positive', 'Adjectives': ['beautiful', 'sunny'], 'Words': 7}```

## 2. Advanced Generation
- More advanced demonstration involving code that would typically break ```json.loads()```

#### Example Usage
```python
res = strict_json(system_prompt = 'You are a code generator, generating code to fulfil a task',
                    user_prompt = 'Given array p, output a function named func_sum to return its sum',
                    output_format = {'Elaboration': 'How you would do it',
                                     'C': 'Code',
                                    'Python': 'Code'},
                    llm = llm)
                                    
print(res)
```

#### Example Output
```{'Elaboration': 'Use a loop to iterate through each element in the array and add it to a running total.', ```

```'C': 'int func_sum(int p[], int size) {\n    int sum = 0;\n    for (int i = 0; i < size; i++) {\n        sum += p[i];\n    }\n    return sum;\n}', ```

```'Python': 'def func_sum(p):\n    sum = 0\n    for num in p:\n        sum += num\n    return sum'}```

## 3. Type forcing output variables
- Generally, ```strict_json``` will infer the data type automatically for you for the output fields
- However, if you would like very specific data types, you can do data forcing using ```type: <data_type>``` at the last part of the output field description
- ```<data_type>``` must be of the form `int`, `float`, `str`, `dict`, `list`, `array`, `code`, `Dict[]`, `List[]`, `Array[]`, `Enum[]`, `bool` for type checking to work
- `code` removes all unicode escape characters that might interfere with normal code running
- The `Enum` and `List` are not case sensitive, so `enum` and `list` works just as well
- For `Enum[list_of_category_names]`, it is best to give an "Other" category in case the LLM fails to classify correctly with the other options.
- If `list` or `List[]` is not formatted correctly in LLM's output, we will correct it by asking the LLM to list out the elements line by line
- For `dict`,  we can further check whether keys are present using `Dict[list_of_key_names]`
- Other types will first be forced by rule-based conversion, any further errors will be fed into LLM's error feedback mechanism
- If `<data_type>` is not the specified data types, it can still be useful to shape the output for the LLM. However, no type checking will be done.
- Note: LLM understands the word `Array` better than `List` since `Array` is the official JSON object type, so in the backend, any type with the word `List` will be converted to `Array`.

### LLM-based checks
- If you would like the LLM to ensure that the type is being met, use `type: ensure <requirement>`
- This will run a LLM to check if the requirement is met. If requirement is not met, the LLM will generate what needs to be done to meet the requirement, which will be fed into the error-correcting loop of `strict_json`

#### Example Usage 1
```python
res = strict_json(system_prompt = 'You are a classifier',
                    user_prompt = 'It is a beautiful and sunny day',
                    output_format = {'Sentiment': 'Type of Sentiment, type: Enum["Pos", "Neg", "Other"]',
                                    'Adjectives': 'Array of adjectives, type: List[str]',
                                    'Words': 'Number of words, type: int',
                                    'In English': 'Whether sentence is in English, type: bool'},
                  llm = llm)
                                    
print(res)
```

#### Example Output 1
```{'Sentiment': 'Pos', 'Adjectives': ['beautiful', 'sunny'], 'Words': 7, 'In English': True}```

#### Example Usage 2
```python
res = strict_json(system_prompt = 'You are an expert at organising birthday parties',
                    user_prompt = 'Give me some information on how to organise a birthday',
                    output_format = {'Famous Quote about Age': 'type: ensure quote contains the word age',
                                    'Lucky draw numbers': '3 numbers from 1-50, type: List[int]',
                                    'Sample venues': 'Describe two venues, type: List[Dict["Venue", "Description"]]'},
                    llm = llm)

print(res)
```

#### Example Output 2
`Using LLM to check "The secret of staying young is to live honestly, eat slowly, and lie about your age. - Lucille Ball" to see if it adheres to "quote contains the word age" Requirement Met: True`


```{'Famous Quote about Age': 'The secret of staying young is to live honestly, eat slowly, and lie about your age. - Lucille Ball',```
```'Lucky draw numbers': [7, 21, 35],```

```'Sample venues': [{'Venue': 'Beachside Resort', 'Description': 'A beautiful resort with stunning views of the beach. Perfect for a summer birthday party.'}, {'Venue': 'Indoor Trampoline Park', 'Description': 'An exciting venue with trampolines and fun activities. Ideal for an active and energetic birthday celebration.'}]}```
"""

In [99]:
def create_strict_json(task, inputs = {}):
    ''' Creates code to use strict_json or Function given a task and the inputs
Returns Code generated, whether code ran, and code output'''
    
    from typing import List, Dict

    for _ in range(3):
        code = ""
        error = ""
        res2 = strict_json(f"""Given the task, write out code using strict_json to fulfil it.
    Previous Code: ```{code}```
    Previous Error: ```{error}```

    Example strict_json code: 
    ```res = strict_json(system_prompt = 'You are a classifier',
        user_prompt = 'It is a beautiful and sunny day',
        output_format = {{'Sentiment': '''Type of Sentiment, type: Enum["Pos", "Neg", "Other"]''',
                        'Adjectives': '''Array of adjectives, type: List[str]''',
                        'Words': '''Number of words, type: int''',
                        'In English': '''Whether sentence is in English, type: bool'''}},
        llm = llm)```
        
    strict_json documentation: ```{documentation}```""",
        f"Task: {task}, Inputs: {inputs}",
        output_format = {"Code": "Python function beginning with ```def my_function(inputs: Dict) -> Dict:\n``` which uses strict_json, type: code"},
        llm = llm)

        try:
            # execute the code
            exec(res2['Code'], globals(), locals())
            output = locals()["my_function"](inputs)
            return res2['Code'], True, output

        except Exception as e:
            # put code and error here
            code = res2['Code']
            error = str(e)
            
    return res2['Code'], False, error

In [100]:
code, compile_correct, result = create_strict_json(task = "Extract out all people and all objects from a sentence", 
                   inputs = {"Sentence": "John ate Mary's sandwich"})
print(code)
print(result)

def my_function(inputs: Dict) -> Dict:
    res = strict_json(system_prompt = "You are a named entity recognizer, extracting people and objects from a sentence",
        user_prompt = inputs["Sentence"],
        output_format = {
            "People": "Array of names of people, type: List[str]",
            "Objects": "Array of objects mentioned, type: List[str]"
        },
        llm = llm)
    return res
{'People': ['John', 'Mary'], 'Objects': ['sandwich']}


In [101]:
code, compile_correct, result = create_strict_json(task = "Generates a poem with the given word", 
                   inputs = {"word": "sandwich"})
print(code)
print(result)

def my_function(inputs: Dict) -> Dict:
    res = strict_json(system_prompt = "You are a poet, generating a poem based on a given word",
        user_prompt = "Generate a poem with the word sandwich",
        output_format = {"Poem": "A creative poem that includes the word sandwich, type: str"},
        llm = llm)
    return res
{'Poem': 'In the kitchen, a canvas awaits,  \nLayers of flavor, a dance on my plate.  \nBread, soft and warm, cradles delight,  \nA sandwich of dreams, a savory bite.  \n\nLettuce whispers, crisp and green,  \nTomatoes blush, a vibrant sheen.  \nCheese melts gently, a golden embrace,  \nEach ingredient finds its rightful place.  \n\nTurkey or ham, a choice to be made,  \nWith mustard and mayo, a masterpiece laid.  \nPickles add crunch, a zesty surprise,  \nIn this simple meal, a feast for the eyes.  \n\nSo here’s to the sandwich, humble yet grand,  \nA comfort, a joy, crafted by hand.  \nIn every bite, a story unfolds,  \nOf flavors and memories, a treasure to h

In [102]:
code, compile_correct, result = create_strict_json(task = "Extract all numbers, including numbers from words", 
                   inputs = {"Text": "forty-six is 34 more than 12"})
print(code)
print(result)

def my_function(inputs: Dict) -> Dict:
    res = strict_json(system_prompt = "You are a number extractor, extracting all numbers from text including numbers from words",
        user_prompt = inputs["Text"],
        output_format = {"Numbers": "Array of numbers extracted from the text, type: Array[int]"},
        llm = llm)
    return res
{'Numbers': [46, 34, 12]}


In [104]:
code, compile_correct, result = create_strict_json(task = '''Extract all numbers, including numbers from words. 
                                    Output the numbers, as well as the word form of the numbers, and whether they are even or odd''', 
                   inputs = {"Text": "forty-six is 34 more than 12"})
print(code)
print(result)

def my_function(inputs: Dict) -> Dict:
    res = strict_json(system_prompt = "You are a number extractor, extracting all numbers from text along with their word forms and whether they are even or odd",
        user_prompt = "forty-six is 34 more than 12",
        output_format = {
            "Numbers": "Array of numbers extracted, type: Array[int]",
            "Word Forms": "Array of word forms of the numbers, type: Array[str]",
            "Even or Odd": "Array indicating whether each number is even or odd, type: Array[str]"
        },
        llm = llm)
    return res
{'Numbers': [46, 34, 12], 'Word Forms': ['forty-six', 'thirty-four', 'twelve'], 'Even or Odd': ['even', 'even', 'even']}
