In [2]:
import torch
import torch.nn.functional as F
from gpt_model import GPT, GPTConfig
import tiktoken

# Force CPU usage
device = "cpu"
print(f"Using device: {device}")

# Load the model
checkpoint_path = "log124M_10B/model_19072.pt"
checkpoint = torch.load(checkpoint_path, map_location=device)
config = checkpoint['config']
model = GPT(config)
model.load_state_dict(checkpoint['model'])
model.to(device)
model.eval()

# Set up the tokenizer
enc = tiktoken.get_encoding("gpt2")

Using device: cpu


  checkpoint = torch.load(checkpoint_path, map_location=device)


In [5]:
import torch
import torch.nn.functional as F
from gpt_json import generate_json

numeric_tokens = [i for i in range(enc.n_vocab) if enc.decode([i]).isdigit()]
period_token = enc.encode(".")[0]
comma_token = enc.encode(",")[0]
numeric_tokens.append(period_token)
numeric_tokens.append(comma_token)
nt = torch.tensor(numeric_tokens, dtype=torch.long).unsqueeze(0).to(device)

# Example usage:
schema = {
        "name": {"type": "string"},
        "age": {"type": "number"}
}

prompt_GPT = f"Generate an example person caled Tom!!!! and output result in the following JSON schema format:\n{schema}\n"
print(prompt_GPT)
result_GPT = generate_json(prompt_GPT, 
                           schema, 
                           model, 
                           enc, 
                           device, 
                           nt, 
                           period_token, 
                           comma_token,
                           seed=0)
print(result_GPT)

Generate an example person caled Tom!!!! and output result in the following JSON schema format:
{'name': {'type': 'string'}, 'age': {'type': 'number'}}



TypeError: generate_json() got an unexpected keyword argument 'seed'

In [4]:
## Test Jsonformer
from transformers import AutoModelForCausalLM, AutoTokenizer

print("Loading model and tokenizer...")
model_name = "databricks/dolly-v2-3b"

# Specify CPU as the device
device = torch.device("cpu")

# Load the model to CPU
model_json = AutoModelForCausalLM.from_pretrained(
    model_name, 
    use_cache=True, 
    device_map="cpu",  # This ensures CPU usage
    torch_dtype=torch.float32  # Use float32 for CPU
)

# Explicitly move the model to CPU (though it should already be there)
model_json = model_json.to(device)

# Load the tokenizer (tokenizer is not affected by device choice)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)

print("Loaded model and tokenizer")

# Verify that the model is on CPU
print(f"Model is on: {next(model_json.parameters()).device}")

  from .autonotebook import tqdm as notebook_tqdm


Loading model and tokenizer...
Loaded model and tokenizer
Model is on: cpu




In [5]:
from jsonformer.format import highlight_values
from jsonformer.main import Jsonformer


person = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "number"},
        "is_student": {"type": "boolean"},
    }
}

builder = Jsonformer(
    model=model_json,
    tokenizer=tokenizer,
    json_schema=person,
    prompt="Generate details of a person who is a student",
    debug=False,
)

print("Generating...")
output = builder()

highlight_values(output)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generating...
{
  name: [32m"John Doe"[0m,
  age: [32m30.0[0m,
  is_student: [32mTrue[0m
}


In [6]:
## Jsonformer
print(f"Jsonformer: {isinstance(output, dict)}")

Jsonformer: True


In [7]:
## GPT-2
import json
json_string = result_GPT[len(prompt_GPT):]
print(f"GPT-2: {json_string}")
json.loads(result_GPT[len(prompt_GPT):])
# It works!!

GPT-2: {"name": "name","age": 201}


{'name': 'name', 'age': 201}

### Post-mortem
- Really bad LLMs (i.e. in the way GPT-2 is worse than GPT-3) are better for testing since they are less likely to "obey" JSON schema, or any other structure. So your code has to be more robust to account for it. For example, GPT-4 might not need any of the additional code I wrote to handle the LLMs' output.
- You can enforce any output structure in the same way I have done here.
- Batch processing output could is more complicated as you may have the output sequences building at different rates. 