In [1]:

from pydantic import BaseModel
from typing import Optional
from groq import Groq
import json
import instructor
from typing import List, Optional, Literal


# Initialize the client with instructor
api_key = "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"
client = Groq(api_key=api_key)


### INSTRUCTOR BASED APPROACH

In [2]:
client = instructor.from_groq(client)

class User(BaseModel):
    name: str
    age: int
    email: Optional[str] = None

In [3]:
class SupportTicket(BaseModel):
    ticket_id: str
    order_id: str
    user_id: str
    date: str
    description: str
    category: Literal["delivery", "payment", "other"]
    priority: Optional[Literal["low", "medium", "high"]] = None
    status: Optional[Literal["open", "closed", "in progress"]] = None
    
    

In [7]:
from utils import UsageTracker

# Clear any completion response hooks -- prevents 
# duplicate writes to the usage tracker.
client.clear("completion:response")

# Create a new tracker
tracker = UsageTracker()

# Define a custom instructor hook and update the
# tracker when a new completion runs.
def log_completion_kwargs(*args, **kwargs):
    usage = args[0].usage
    tracker.track(usage)

# Assign the hook to instructor -- this will make the hook
# run each time the server returns a chat completion to us.
client.on("completion:response", log_completion_kwargs)

In [18]:
from typing import Literal

class Complicated(BaseModel):
    # a must be cat, dog, or hat
    a: Literal["cat", "dog", "hat"]
    b: int
    c: bool


In [21]:
# Clear the tracker before we run the completion
# so we arent' tracking multiple jobs.
tracker.clear()

# Now you can use the Pydantic model directly with response_model
completion = client.chat.completions.create(
    model="gemma2-9b-it",
    messages=[
        {
            "role": "system",
            "content": """Don't give me what I want"""
        },
        {
            "role": "user",
            "content": """Write me a short essay on Dolly Parton.."""
        }
    ],
    temperature=0.6,
    max_completion_tokens=1024,
    response_model=Complicated,  # Use response_model instead of response_format
    max_retries=4
)

print(completion)  # This will be a User instance

a='cat' b=1 c=True


In [22]:
print("Input tokens:  ", tracker.input_tokens)
print("Output tokens: ", tracker.output_tokens)
print("Total tokens:  ", sum(tracker.total_tokens))
print("Num retries:   ", len(tracker.output_tokens))

Input tokens:   [955]
Output tokens:  [95]
Total tokens:   1050
Num retries:    1


In [35]:

# Now you can use the Pydantic model directly with response_model
completion = client.chat.completions.create(
    model="gemma2-9b-it",
    messages=[
        {
            "role": "system",
            "content": """You are a support ticket assistant working in a food delivery company. You will be given a description /
             of a support ticket raised by a user. Your task is to extract the following information from the description: /
             
             1. ticket_id   
             2. order_id
             3. user_id
             4. date
             5. description (create a summary of the user's ticket)
             6. category (e.g. delivery, payment, etc.)
             7. priority (e.g. low, medium, high)
             8. status (e.g. open, closed, in progress)

             DO NOT include any additional information.
             
             """
        },
        {
            "role": "user",
            "content": """ I ordered a chicken biriyani and beef. but beef is missing. order was placed on 2023-01-01. ticket_id is 1232399, order_id is 12312399, user_id is 12322399"""
        }
    ],
    temperature=0.6,
    max_completion_tokens=1024,
    response_model=SupportTicket  # Use response_model instead of response_format
)

print(completion)  # This will be a User instance

ticket_id='1232399' order_id='12312399' user_id='12322399' date='2023-01-01' description='User ordered chicken biriyani and beef, but beef was missing from the order.' category='delivery' priority='high' status='open'


### outlines

In [25]:
import outlines

# Downloads the model from HuggingFace if you don't already have it, 
# then loads it into memory
model = outlines.models.transformers(
    "HuggingFaceTB/SmolLM2-135M-Instruct"
)

In [28]:
from outlines_utils import track_logits

generator = outlines.generate.json(
    model, 
    User,
    sampler = outlines.samplers.greedy()
)

# Add tools to track token probabilities as they are generated
track_logits(generator)

<outlines.generate.api.SequenceGeneratorAdapter at 0x17a943640>

In [32]:
from outlines_utils import template

print(template(
    model, 
    "Give me a user with a name and an age.",
    system_prompt="You create users."
))

<|im_start|>system
You create users.<|im_end|>
<|im_start|>user
Give me a user with a name and an age.<|im_end|>
<|im_start|>assistant



In [33]:
# Remove any previously tracked logits
generator.logits_processor.clear()

person = generator(
    template(
        model, 
        "Give me a person with a name and an age.",
        system_prompt="You create users."
    ),
)

person



User(name='John', age=30, email=None)