In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import guidance
from textwrap import dedent
import json 

from grammar_guide import guide, load_parser

In [2]:
model_name_or_path = "HuggingFaceTB/SmolLM-135M"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
parser = load_parser(lark_grammar_filepath="../grammars/json.lark")

In [5]:
from transformers import pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_new_tokens=20,
    return_full_text=True
)
prompt = dedent("""
Here is a really long, nested JSON that extracts fields from this sentence:\n\nMy name is Joseph Smith, and I work at Apple. I'm 32 years old, and my interests include kayaking, skiing, snowboarding, and woodworking.\n\n```json\n
""")
res = guide(
    model=lambda x: pipe(x)[0]['generated_text'].lstrip(prompt),
    parser=parser,
    seed_str="""{"name":""",
    prompt=prompt,
    draft_model=guidance.models.Transformers(
        model_name_or_path, echo=False
    ),
    max_grammar_corrections=20,
    verbose=False,
)
print(res.process_time_seconds)
print(len(tokenizer(res.response)['input_ids']) / res.process_time_seconds)
print(res.response)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


3.8636739253997803
11.388124580271679
{"name": "Joseph Smith, 32", "age": 32, "job": "K" , "location": "Apple", "city": "San Francisco", "state": "CA"}


In [4]:
res = guide(
    model,
    tokenizer=tokenizer,
    parser=parser,
    seed_str="""{"name":""",
    prompt=dedent("""
    Here is a really long, nested JSO that extracts fields from this sentence:\n\nMy name is Joseph Smith, and I work at Apple. I'm 32 years old, and my interests include kayaking, skiing, snowboarding, and woodworking.\n\n```json\n
    """),
    draft_model=guidance.models.Transformers(
        model_name_or_path, echo=False
    ),
    stop_at=['```'],
    max_grammar_corrections=20,
    verbose=False,
    max_new_tokens=20,
    temperature=0.0,
)
print(res.process_time_seconds)
print(len(tokenizer(res.response)['input_ids']) / res.process_time_seconds)

4.000513792037964
20.747335046111086


In [5]:
try:
    print(json.dumps(json.loads(res.response), indent = 4))
except:
    print(res.response)

{
    "name": "Joseph Smith, 32 years old, and my interests include kayaking, skiing, snow",
    "age": 32,
    "occupation": "skiing, skiing, snowboarding, and woodworking",
    "job": "johnson",
    "job_title": "johnson",
    "job_description": "johnson"
}
