### Working on Strucuted Outputs with Instructor Library

In [1]:
import openai
import instructor
from qdrant_client import QdrantClient
from pydantic import BaseModel, Field

### basic example

In [5]:
prompt = """
You are a helpful assistant that can answer questions and help with tasks.

Question: What is the capital of the India?
"""


In [6]:
response = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": prompt}],
)

print(response.choices[0].message.content)

The capital of India is New Delhi.


In [7]:
# Add instructor library to the mix

class City(BaseModel):
    name: str = Field(description="The name of the city")
    country: str = Field(description="The country the city is in")

instructor_prompt = instructor.from_openai(openai.OpenAI())

response = instructor_prompt.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": prompt}],
    response_model=City,
)

response

City(name='New Delhi', country='India')

### Complex Example - inducing chain of thought

In [14]:
from typing import Optional, List
from pydantic import field_validator

class User(BaseModel):
    name: str = Field(description="The name of the user")
    age: int = Field(description="The age of the user")
    email: Optional[str] = Field(None, description="The email of the user")
    
    @field_validator("name")
    def name_must_be_uppsercase(cls, v):
        if not v.isupper():
            raise ValueError("Name must be uppercase")
        return v
    
    @field_validator("age")
    def age_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError("Age must be positive")
        return v
    
 
class UserExtraction(BaseModel):
    chain_of_thought: str = Field(..., description="Think step-by-step about which users to extract and why.")
    users: List[User] = Field(description="The user object")






In [19]:
import warnings
data = "KRISHNA (25) and KASHVI (10) from accounting. krishna's email is krishna@work.com"
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
print(f"--- Input: '{data}' ---\n")

# We use `create_partial` to stream the object as it builds.
# We set `max_retries=3` to allow the model to fix validation errors (like lowercase names).
extraction_stream = instructor_prompt.chat.completions.create_partial(
    model="gpt-4o",
    response_model=UserExtraction,
    messages=[
        {"role": "user", "content": f"Extract users from this text: {data}"},
    ],
    max_retries=3, 
)

print("-"*100)
final_result = None
for chunk in extraction_stream:
    final_result = chunk
    #print(f"Streaming: {chunk.model_dump_json(exclude_none=True)}  ")
    
print(final_result.model_dump_json(indent=2))

--- Input: 'KRISHNA (25) and KASHVI (10) from accounting. krishna's email is krishna@work.com' ---

----------------------------------------------------------------------------------------------------
{
  "chain_of_thought": "The text provides information about two users, KRISHNA and KASHVI. KRISHNA is 25 years old and has an email address specified. KASHVI is 10 years old but no email is provided or implied for KASHVI. Both are from the accounting department.",
  "users": [
    {
      "name": "KRISHNA",
      "age": 25,
      "email": "krishna@work.com"
    },
    {
      "name": "KASHVI",
      "age": 10,
      "email": null
    }
  ]
}
