In [1]:
import guidance
import json 
from openai import OpenAI
import os 
from dotenv import load_dotenv

import grammar_guide as gg

load_dotenv()

True

In [5]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
draft_model_name_or_path = "HuggingFaceTB/SmolLM-135M"

In [13]:
def openai_generate(s: str, prompt: str, max_new_tokens: int) -> str:
    messages = [
        {
            "role": "system",
            "content": prompt
        }
    ]
    if s:
        messages += [
            {
                "role": "assistant",
                "content": s
            }
        ]
    chat_completion = client.chat.completions.create(
        messages=messages,
        model="gpt-4o-mini",
        max_tokens=max_new_tokens
    )
    return chat_completion.choices[0].message.content

In [14]:
from string import Template
 
num_json_keys = 6 # The number of JSON keys we want to generate (not telling GPT this!)
lark_gramar_str = Template(open("../examples/benchmarks/json.lark").read()).safe_substitute(
    NUM_REPEATS=f"{num_json_keys-1}"
)
res = gg.guide(
    model=openai_generate,
    parser=gg.load_parser(lark_gramar_str),
    prompt="Continue the below JSON object with only string values.",
    draft_model=guidance.models.Transformers(
        draft_model_name_or_path, echo=False
    ),
    max_new_tokens=50,
    max_grammar_corrections=3,
    verbose=True,
)
print(res.process_time_seconds)

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
[33mMade a draft_gen correction...[39m


3.367475748062134


In [15]:
print(json.dumps(json.loads(res.response), indent=4))

{
    "name": "John Doe",
    "email": "johndoe@example.com",
    "address": "123 Main St, Anytown, USA",
    "phone": "555-1234",
    "occupation": "Engineer  "
}


In [16]:
print(json.dumps(res.to_list(), indent=4))

[
    {
        "Original": "{  \"name\": \"John Doe\",\n  \"email\": \"johndoe@example.com\",\n  \"address\": \"123 Main St, Anytown, USA\",\n  \"phone\": \"555-1234\",\n  \"occupation\": \"",
        "Corrected": "{  \"name\": \"John Doe\",\n  \"email\": \"johndoe@example.com\",\n  \"address\": \"123 Main St, Anytown, USA\",\n  \"phone\": \"555-1234\",\n  \"occupation\": \"Engineer",
        "Selected Candidate": "Engineer",
        "Type": "draft_gen"
    },
    {
        "Original": "{  \"name\": \"John Doe\",\n  \"email\": \"johndoe@example.com\",\n  \"address\": \"123 Main St, Anytown, USA\",\n  \"phone\": \"555-1234\",\n  \"occupation\": \"Engineer  \"company\": \"Tech Solutions Inc.\",\n  \"hobbies\": \"Reading, Hiking, Coding\",\n  \"favorite_color\": \"Blue\",\n  \"marital_status\": \"Single\",\n  \"bio\": \"A passionate engineer with a love for",
        "Corrected": "{  \"name\": \"John Doe\",\n  \"email\": \"johndoe@example.com\",\n  \"address\": \"123 Main St, Anytown, US

GTP-4o is generally good at following instructions, so unlike in `transformers-gg.ipynb`, we don't need to backtrack and correct an invalid numeric JSON value. Instead, we can just parse up until we hit our intended 6 JSON keys and insert a closing bracket `}`.