In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import guidance
from textwrap import dedent
import json 

from grammar_guide import guide, load_parser

In [2]:
model_name_or_path = "HuggingFaceTB/SmolLM-360M"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
parser = load_parser(lark_grammar_filepath="../grammars/json.lark")

tokenizer_config.json:   0%|          | 0.00/3.69k [00:00<?, ?B/s]

In [3]:
from transformers import pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_new_tokens=20,
    return_full_text=True
)
prompt = dedent("""
Here is a really long, nested JSON that extracts fields from this sentence:\n\nMy name is Joseph Smith, and I work at Apple. I'm 32 years old, and my interests include kayaking, skiing, snowboarding, and woodworking.\n\n```json\n
""")
res = guide(
    model=lambda x: pipe(x)[0]['generated_text'].lstrip(prompt),
    parser=parser,
    seed_str="""{"name":""",
    prompt=prompt,
    draft_model=guidance.models.Transformers(
        model_name_or_path, echo=False
    ),
    max_grammar_corrections=20,
    verbose=False,
)
print(res.process_time_seconds)
print(len(tokenizer(res.response)['input_ids']) / res.process_time_seconds)
print(res.response)

  Referenced from: <31D2ED80-D446-353A-885A-F2032D05B554> /Users/a720504/miniconda3/envs/blendsql/lib/python3.9/site-packages/torchvision/image.so
  Expected in:     <709C1DF5-D253-3C66-87E2-C99FD3A259DF> /Users/a720504/miniconda3/envs/blendsql/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`

11.954229831695557
5.437405915323661
{"name": "Joseph Smith", "age": 32, "interests": ["skiing", "hunting", "snowboarding", "woodworking"], "work": "Apple", "email": "<EMAIL>", "phone": "+1 415 555 5555"}


In [4]:
res = guide(
    model,
    tokenizer=tokenizer,
    parser=parser,
    seed_str="""{"name":""",
    prompt=dedent("""
    Here is a really long, nested JSO that extracts fields from this sentence:\n\nMy name is Joseph Smith, and I work at Apple. I'm 32 years old, and my interests include kayaking, skiing, snowboarding, and woodworking.\n\n```json\n
    """),
    draft_model=guidance.models.Transformers(
        model_name_or_path, echo=False
    ),
    stop_at=['```'],
    max_grammar_corrections=20,
    verbose=False,
    max_new_tokens=20,
    temperature=0.0,
)
print(res.process_time_seconds)
print(len(tokenizer(res.response)['input_ids']) / res.process_time_seconds)

8.229501962661743
6.318729886198504


In [5]:
try:
    print(json.dumps(json.loads(res.response), indent = 4))
except:
    print(res.response)

{
    "name": "Joseph Smith",
    "age": 32,
    "interests": [
        "skiing",
        "snowboarding",
        "woodworking"
    ],
    "work": "Apple",
    "email": "<EMAIL>",
    "phone": "502-555-5555"
}
