# Pydantic Basics

In [9]:
from pydantic import BaseModel, Field, ValidationError, EmailStr
from typing import Optional
from datetime import date
import json

In [10]:
class UserInput(BaseModel):
    name: str
    email: EmailStr
    query: str
    order_id: Optional[int] = Field(default=None, description="Order ID", ge=1, le=1000000)
    purchase_date: Optional[date] = Field(default=None, description="Date of purchase transactions")



In [13]:
def validate_user_input(input_data):
    try:
        # Attempt to create a UserInput model instance from user input data
        user_input = UserInput(**input_data)
        print(f"✅ Valid user input created:")
        print(f"{user_input.model_dump_json(indent=2)}")
        return user_input
    except ValidationError as e:
        # Capture and display validation errors in a readable format
        print(f"❌ Validation error occurred:")
        for error in e.errors():
            print(f"  - {error['loc'][0]}: {error['msg']}")
        return None

In [11]:
user_input = UserInput(name="Rohit Kumar",
    email="rohitkumar@example.com",
    query="What is the capital of India?")

print(user_input)

name='Rohit Kumar' email='rohitkumar@example.com' query='What is the capital of India?' order_id=None purchase_date=None


In [12]:
# Puttting invalid email format
user_input = UserInput(name="Rohit Kumar",
    email="rohitkumar_example.com",
    query="What is the capital of India?")

print(user_input)

ValidationError: 1 validation error for UserInput
email
  value is not a valid email address: An email address must have an @-sign. [type=value_error, input_value='rohitkumar_example.com', input_type=str]

In [14]:
input_data = {
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": f"""I bought a laptop carrying case and it turned out to be 
             the wrong size. I need to return it.""",
    "order_id": 12345,
    "purchase_date": date(2025, 12, 31)
}

# Validate the user input data
user_input = validate_user_input(input_data)

✅ Valid user input created:
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I bought a laptop carrying case and it turned out to be \n             the wrong size. I need to return it.",
  "order_id": 12345,
  "purchase_date": "2025-12-31"
}


In [16]:
# It will ignore the system_message as it is not a part of the UserInput model
input_data = {
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": f"""I bought a laptop carrying case and it turned out to be 
             the wrong size. I need to return it.""",
    "order_id": 12345,
    "system_message": "logging status regarding order processing...",
    "purchase_date": date(2025, 12, 31)
}

# Validate the user input data
user_input = validate_user_input(input_data)

✅ Valid user input created:
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I bought a laptop carrying case and it turned out to be \n             the wrong size. I need to return it.",
  "order_id": 12345,
  "purchase_date": "2025-12-31"
}


In [None]:
# Date as string which is automatic handled by pydantic
input_data = {
    "name": "Joe User",
    "email": "joe.user@example.com",
    "query": f"""I bought a laptop carrying case and it turned out to be 
             the wrong size. I need to return it.""",
    "order_id": 12345,
    "purchase_date": "2025-12-31"
}

user_input = validate_user_input(input_data)

✅ Valid user input created:
{
  "name": "Joe User",
  "email": "joe.user@example.com",
  "query": "I bought a laptop carrying case and it turned out to be \n             the wrong size. I need to return it.",
  "order_id": 12345,
  "purchase_date": "2025-12-31"
}


In [19]:

print(user_input)

name='Joe User' email='joe.user@example.com' query='I bought a laptop carrying case and it turned out to be \n             the wrong size. I need to return it.' order_id=12345 purchase_date=datetime.date(2025, 12, 31)


# Prompting for structured output and setting up retrying logic

In [22]:
from pydantic import BaseModel, Field, ValidationError, EmailStr
from typing import List, Literal,Optional
from datetime import date
from dotenv import load_dotenv
import openai
import json


In [24]:
load_dotenv()
client = openai.OpenAI()

In [23]:
user_input_json = '''
{
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": "I forgot my password.",
    "order_number": 12345,
    "purchase_date": null
}
'''

In [25]:
user_input = UserInput.model_validate_json(user_input_json)

In [26]:

class CustomerQuery(UserInput):
    priority: str = Field(..., description="Priority level: low, medium, high")
    category: Literal['refund_request', 'information_request', 'other'] = Field(..., description="Query category")
    is_complaint: bool = Field(..., description="Whether this is a complaint")
    tags: List[str] = Field(..., description="Relevant keyword tags")

In [27]:
# Create a prompt with generic example data to guide LLM.
example_response_structure = f"""{{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}}"""

In [28]:
# Create prompt with user data and expected JSON structure
prompt = f"""
Please analyze this user query\n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{example_response_structure}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.
"""

print(prompt)


Please analyze this user query
 {
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.



In [29]:
# Define a function to call the LLM
def call_llm(prompt, model="gpt-4o"):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [30]:
# Get response from LLM
response_content = call_llm(prompt)
print(response_content)

```json
{
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "low",
    "category": "account_issue",
    "is_complaint": False,
    "tags": ["password", "support", "account"]
}
```


In [31]:
# Attempt to parse the response into CustomerQuery model
valid_data = CustomerQuery.model_validate_json(response_content)

ValidationError: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

In [33]:
# Define a function to validate an LLM response
def validate_with_model(data_model, llm_response):
    try:
        validated_data = data_model.model_validate_json(llm_response)
        print("data validation successful!")
        print(validated_data.model_dump_json(indent=2))
        return validated_data, None
    except ValidationError as e:
        print(f"error validating data: {e}")
        error_message = (
            f"This response generated a validation error: {e}."
        )
        return None, error_message

In [34]:
# Test your validation function with the LLM response
validated_data, validation_error = validate_with_model(
    CustomerQuery, response_content
)

error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid


In [35]:
# Define a function to create a retry prompt with error feedback
def create_retry_prompt(
    original_prompt, original_response, error_message
):
    retry_prompt = f"""
This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>
{original_prompt}
</original_prompt>

Here is the original llm_response:
<llm_response>
{original_response}
</llm_response>

This response generated an error: 
<error_message>
{error_message}
</error_message>

Compare the error message and the llm_response and identify what 
needs to be fixed or removed
in the llm_response to resolve this error. 

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON string.
"""
    return retry_prompt

In [36]:
# Create a retry prompt for validation errors
validation_retry_prompt = create_retry_prompt(
    original_prompt=prompt,
    original_response=response_content,
    error_message=validation_error
)

print(validation_retry_prompt)


This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>

Please analyze this user query
 {
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.

</original_prompt>

Here is the original llm_response:
<llm_response>
```json
{
    "name": "Rohit Kumar",
    "email": "rohitkumar@

In [37]:
# Call the LLM with the validation retry prompt
validation_retry_response = call_llm(validation_retry_prompt)
print(validation_retry_response)

```json
{
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "low",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "support", "account"]
}
```


In [38]:
# Attempt to validate retry response from LLM
validated_data, validation_error = validate_with_model(
    CustomerQuery, validation_retry_response
)

error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid


In [39]:
# Create a second retry prompt for validation errors
second_validation_retry_prompt = create_retry_prompt(
    original_prompt=validation_retry_prompt,
    original_response=validation_retry_response,
    error_message=validation_error
)

print(second_validation_retry_prompt)


This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>

This is a request to fix an error in the structure of an llm_response.
Here is the original request:
<original_prompt>

Please analyze this user query
 {
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null
}:

Return your analysis as a JSON object matching this exact structure 
and data types:
{
    name="Example User",
    email="user@example.com",
    query="I ordered a new computer monitor and it arrived with the screen cracked. I need to exchange it for a new one.",
    order_id=12345,
    purchase_date="2025-12-31",
    priority="medium",
    category="refund_request",
    is_complaint=True,
    tags=["monitor", "support", "exchange"] 
}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.

</original_p

In [40]:
# Call the LLM with the second validation retry prompt
second_validation_retry_response = call_llm(
    second_validation_retry_prompt
)
print(second_validation_retry_response)

{
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": "I forgot my password.",
    "order_id": null,
    "purchase_date": null,
    "priority": "low",
    "category": "account_issue",
    "is_complaint": false,
    "tags": ["password", "support", "account"]
}


In [41]:
# Define a function to automatically retry an LLM call multiple times
def validate_llm_response(
    prompt, data_model, n_retry=5, model="gpt-4o"
):
    # Initial LLM call
    response_content = call_llm(prompt, model=model)
    current_prompt = prompt

    # Try to validate with the model
    # attempt: 0=initial, 1=first retry, ...
    for attempt in range(n_retry + 1):

        validated_data, validation_error = validate_with_model(
            data_model, response_content
        )

        if validation_error:
            if attempt < n_retry:
                print(f"retry {attempt} of {n_retry} failed, trying again...")
            else:
                print(f"Max retries reached. Last error: {validation_error}")
                return None, (
                    f"Max retries reached. Last error: {validation_error}"
                )

            validation_retry_prompt = create_retry_prompt(
                original_prompt=current_prompt,
                original_response=response_content,
                error_message=validation_error
            )
            response_content = call_llm(
                validation_retry_prompt, model=model
            )
            current_prompt = validation_retry_prompt
            continue

        # If you get here, both parsing and validation succeeded
        return validated_data, None

In [42]:
# Test your complete solution with the original prompt
validated_data, error = validate_llm_response(
    prompt, CustomerQuery
)

error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 0 of 5 failed, trying again...
error validating data: 1 validation error for CustomerQuery
category
  Input should be 'refund_request', 'information_request' or 'other' [type=literal_error, input_value='account_issue', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/literal_error
retry 1 of 5 failed, trying again...
error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "name": ...rt", "account"]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 2 of 5 failed, trying again...
data validatio

In [43]:
# Investigate the model_json_schema for CustomerQuery
data_model_schema = json.dumps(
    CustomerQuery.model_json_schema(), indent=2
)
print(data_model_schema)

{
  "properties": {
    "name": {
      "title": "Name",
      "type": "string"
    },
    "email": {
      "format": "email",
      "title": "Email",
      "type": "string"
    },
    "query": {
      "title": "Query",
      "type": "string"
    },
    "order_id": {
      "anyOf": [
        {
          "maximum": 1000000,
          "minimum": 1,
          "type": "integer"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Order ID",
      "title": "Order Id"
    },
    "purchase_date": {
      "anyOf": [
        {
          "format": "date",
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Date of purchase transactions",
      "title": "Purchase Date"
    },
    "priority": {
      "description": "Priority level: low, medium, high",
      "title": "Priority",
      "type": "string"
    },
    "category": {
      "description": "Query categ

In [46]:
user_input.model_dump_json(indent=2)

'{\n  "name": "Rohit Kumar",\n  "email": "rohitkumar@example.com",\n  "query": "I forgot my password.",\n  "order_id": null,\n  "purchase_date": null\n}'

In [44]:
# model_json_schema() is has the rich information about the model
# Create new prompt with user input and model_json_schema
prompt = f"""
Please analyze this user query\n {user_input.model_dump_json(indent=2)}:

Return your analysis as a JSON object matching the following schema:
{data_model_schema}

Respond ONLY with valid JSON. Do not include any explanations or 
other text or formatting before or after the JSON object.
"""

In [45]:
# Run your validate_llm_response function with the new prompt
final_analysis, error = validate_llm_response(
    prompt, CustomerQuery
)

error validating data: 1 validation error for CustomerQuery
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "name": "R...   "login"\n  ]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
retry 0 of 5 failed, trying again...
data validation successful!
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "high",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "password",
    "account_access",
    "login"
  ]
}


# Using Pydantic Models for Structured LLM Output

In [48]:
import instructor
import anthropic

In [49]:
print(user_input_json)


{
    "name": "Rohit Kumar",
    "email": "rohitkumar@example.com",
    "query": "I forgot my password.",
    "order_number": 12345,
    "purchase_date": null
}



In [50]:
# Validate using UserInput model

user_input = UserInput.model_validate_json(user_input_json)

print(user_input)

name='Rohit Kumar' email='rohitkumar@example.com' query='I forgot my password.' order_id=None purchase_date=None


In [56]:
prompt = (
    f"Analyze the following customer query {user_input} "
    f"and provide a structured response."
)

In [None]:
load_dotenv()

# Using instructor
anthropic_client = instructor.from_anthropic(anthropic.Anthropic())

response = anthropic_client.messages.create(
    model="claude-3-7-sonnet-latest",  
    max_tokens=1024,
    messages=[
        {
            "role": "user", 
            "content": prompt
        }
    ],
    response_model=CustomerQuery  
)

In [58]:
print(type(response))
print(response.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "medium",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "password",
    "account",
    "login"
  ]
}


In [60]:
# Without using instructor
from openai import OpenAI
openai_client = OpenAI()
response = openai_client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    response_format=CustomerQuery
)
response_content = response.choices[0].message.content
print(type(response_content))
print(response_content)

<class 'str'>
{"name":"Rohit Kumar","email":"rohitkumar@example.com","query":"I forgot my password.","order_id":null,"purchase_date":null,"priority":"medium","category":"information_request","is_complaint":false,"tags":["account","password","login"]}


In [61]:
# Above is valid json but not necessary datamodel

In [62]:
# Validate the repsonse you got from the LLM
valid_data = CustomerQuery.model_validate_json(
    response_content
)
print(type(valid_data))
print(valid_data.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "medium",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "account",
    "password",
    "login"
  ]
}


In [63]:
# Try the responses API from OpenAI
response = openai_client.responses.parse(
    model="gpt-4o",
    input=[{"role": "user", "content": prompt}],
    text_format=CustomerQuery
)

print(type(response))

<class 'openai.types.responses.parsed_response.ParsedResponse[CustomerQuery]'>


In [64]:
# Investigate class inheritance structure of the OpenAI response
def print_class_inheritence(llm_response):
    for cls in type(llm_response).mro():
        print(f"{cls.__module__}.{cls.__name__}")

print_class_inheritence(response)

openai.types.responses.parsed_response.ParsedResponse[CustomerQuery]
openai.types.responses.parsed_response.ParsedResponse
openai.types.responses.response.Response
openai._models.GenericModel
openai._compat.GenericModel
openai.BaseModel
pydantic.main.BaseModel
typing.Generic
builtins.object


In [65]:
# Print the response type and content 
print(type(response.output_parsed))
print(response.output_parsed.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "medium",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "password",
    "account_access",
    "login_issue"
  ]
}


In [70]:
# Try out the Pydantic AI package for defining an agent and getting a structured response
from pydantic_ai import Agent
import nest_asyncio
nest_asyncio.apply()

agent = Agent(
    model="google-gla:gemini-2.0-flash",
    output_type=CustomerQuery,
)

response = agent.run_sync(prompt)

In [71]:
# Print out the repsonse type and content
print(type(response.output))
print(response.output.model_dump_json(indent=2))

<class '__main__.CustomerQuery'>
{
  "name": "Rohit Kumar",
  "email": "rohitkumar@example.com",
  "query": "I forgot my password.",
  "order_id": null,
  "purchase_date": null,
  "priority": "low",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "password reset"
  ]
}
