# Pydantic Parser for Structured Output

https://python.langchain.com/docs/how_to/structured_output/#prompting-and-parsing-model-outputs-directly

In [1]:
from typing import List, Literal, Union

from langchain_core.output_parsers import PydanticOutputParser
# from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from langchain_core.exceptions import OutputParserException
from pydantic import BaseModel, Field
from langchain_core.prompts import PromptTemplate

from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOllama(model="llama3.2", temperature=0)


In [9]:
# nested model example

class Person(BaseModel):
    """Information about a person."""

    name: str = Field(..., description="The name of the person")
    height_in_meters: float = Field(
        ..., description="The height of the person expressed in meters."
    )


class People(BaseModel):
    """Identifying information about all people in a text."""

    people: List[Person]


# Set up a parser
people_parser = PydanticOutputParser(pydantic_object=People)

# Prompt
people_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
        ),
        ("human", "{query}"),
    ]
).partial(format_instructions=people_parser.get_format_instructions())
# inspect the output formatting instruction

print(people_parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Person": {"description": "Information about a person.", "properties": {"name": {"description": "The name of the person", "title": "Name", "type": "string"}, "height_in_meters": {"description": "The height of the person expressed in meters.", "title": "Height In Meters", "type": "number"}}, "required": ["name", "height_in_meters"], "title": "Person", "type": "object"}}, "description": "Identifying information about all people in a text.", "properties": {"people": {"items": {"$ref": "#/$defs/Person"}, "title": "People", "type": "arra

In [10]:
# inspect the prompt submitting to LLM

people_query = "Anna is 23 years old and she is 6 feet tall"

print(people_prompt.invoke(query).to_string())

System: Answer the user query. Wrap the output in `json` tags
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Person": {"description": "Information about a person.", "properties": {"name": {"description": "The name of the person", "title": "Name", "type": "string"}, "height_in_meters": {"description": "The height of the person expressed in meters.", "title": "Height In Meters", "type": "number"}}, "required": ["name", "height_in_meters"], "title": "Person", "type": "object"}}, "description": "Identifying information about all people in a text.", "properties": {"people": {"items"

In [11]:
people_chain = people_prompt | llm | people_parser

people_chain.invoke({"query": people_query})

People(people=[Person(name='Anna', height_in_meters=1.73)])

# Union of Multiple Output Classes

In [2]:
# First, let's define our base models for different types of responses
class MovieReview(BaseModel):
    media_type: Literal["movie"] = "movie"
    title: str = Field(description="Title of the movie")
    rating: int = Field(description="Rating from 1-5", ge=1, le=5)
    review: str = Field(description="Brief review text")

class BookReview(BaseModel):
    media_type: Literal["book"] = "book"
    title: str = Field(description="Title of the book")
    author: str = Field(description="Author of the book")
    rating: int = Field(description="Rating from 1-5", ge=1, le=5)
    review: str = Field(description="Brief review text")

class GameReview(BaseModel):
    media_type: Literal["game"] = "game"
    title: str = Field(description="Title of the game")
    platform: str = Field(description="Gaming platform (e.g., PC, PS5, Xbox)")
    rating: int = Field(description="Rating from 1-5", ge=1, le=5)
    review: str = Field(description="Brief review text")

# Create a shared parent class for all review types
class ReviewUnion(BaseModel):
    review: MovieReview | BookReview | GameReview = Field(..., discriminator='media_type')


In [5]:

# Create a prompt template
multi_review_template = """
System: Provide a succinct and concise review for the following entertainment item: {item}

{format_instructions}

"""


In [6]:
movie_parser = PydanticOutputParser(pydantic_object=MovieReview)

movie_prompt = PromptTemplate(
    template=multi_review_template,
    input_variables=["item"],
    partial_variables={"format_instructions": movie_parser.get_format_instructions()}
)

movie_chain = movie_prompt | llm | movie_parser
def get_movie_review(item_description: str):
    
    try:
        # Run the chain
        result = movie_chain.invoke({"item": item_description})
        return result
    except OutputParserException as e:
        print(f"Parsing error: {e}")
        return None

print(movie_parser.get_format_instructions())


The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"media_type": {"const": "movie", "default": "movie", "enum": ["movie"], "title": "Media Type", "type": "string"}, "title": {"description": "Title of the movie", "title": "Title", "type": "string"}, "rating": {"description": "Rating from 1-5", "maximum": 5, "minimum": 1, "title": "Rating", "type": "integer"}, "review": {"description": "Brief review text", "title": "Review", "type": "string"}}, "required": ["title", "rating", "review"]}
```


In [7]:
# Create the parser with the parent class
multiple_review_parser = PydanticOutputParser(pydantic_object=ReviewUnion)

multi_review_prompt = PromptTemplate(
    template=multi_review_template,
    input_variables=["item"],
    partial_variables={"format_instructions": multiple_review_parser.get_format_instructions()}
)

multi_review_chain = multi_review_prompt | llm | multiple_review_parser

# Function to process reviews
def get_review(item_description: str):
    
    try:
        # Run the chain
        result = multi_review_chain.invoke({"item": item_description})
        return result
    except OutputParserException as e:
        print(f"Parsing error: {e}")
        return None

print(multiple_review_parser.get_format_instructions())


The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"BookReview": {"properties": {"media_type": {"const": "book", "default": "book", "enum": ["book"], "title": "Media Type", "type": "string"}, "title": {"description": "Title of the book", "title": "Title", "type": "string"}, "author": {"description": "Author of the book", "title": "Author", "type": "string"}, "rating": {"description": "Rating from 1-5", "maximum": 5, "minimum": 1, "title": "Rating", "type": "integer"}, "review": {"description": "Brief review text", "title": "Review", "type": "string"}}, "required": ["title", "author"

In [8]:
items = [
        "The Matrix (1999 sci-fi film)",
        "The Hunger Games by Suzanne Collins",
        "Red Dead Redemption 2 for PS4"
    ]
for item in items:
    print(f"\nMulti-review prompt for: {item}")
    print(multi_review_prompt.invoke(item).to_string())
    print(f"\nMovie-review prompt for: {item}")
    print(movie_prompt.invoke(item).to_string())


Multi-review prompt for: The Matrix (1999 sci-fi film)

System: Provide a succinct and concise review for the following entertainment item: The Matrix (1999 sci-fi film)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"BookReview": {"properties": {"media_type": {"const": "book", "default": "book", "enum": ["book"], "title": "Media Type", "type": "string"}, "title": {"description": "Title of the book", "title": "Title", "type": "string"}, "author": {"description": "Author of the book", "title": "Author", "type": "string"}, "rating": {"description": "Rating from 1-5", "maximum": 

In [9]:
# Parse by the MovieReview schema
# Test with different types of items
for item in items:
    print(f"\nProcessing review for: {item}")
    review = get_movie_review(item)
    
    if review:
        print(f"Media Type: {review.media_type}")
        print(f"Title: {review.title}")
        
        # Print type-specific fields
        if isinstance(review, BookReview):
            print(f"Author: {review.author}")
        elif isinstance(review, GameReview):
            print(f"Platform: {review.platform}")
            
        print(f"Rating: {review.rating}/5")
        print(f"Review: {review.review}")




Processing review for: The Matrix (1999 sci-fi film)
Parsing error: Failed to parse MovieReview from completion {"properties": {"media_type": {"const": "movie", "default": "movie", "enum": ["movie"], "title": "Media Type", "type": "string"}, "title": {"description": "The Matrix (1999 sci-fi film)", "title": "The Matrix", "type": "string"}, "rating": {"description": "A thought-provoking and visually stunning film that explores the nature of reality.", "maximum": 5, "minimum": 1, "title": "Rating", "type": "integer"}, "review": {"description": "A classic sci-fi film with innovative special effects and a gripping storyline.", "title": "Review", "type": "string"}}}. Got: 3 validation errors for MovieReview
title
  Field required [type=missing, input_value={'properties': {'media_ty...ew', 'type': 'string'}}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/missing
rating
  Field required [type=missing, input_value={'properties': {'media_ty...ew', 'type'

In [10]:
# Parse by the union of the three review schemas
for item in items:
    print(f"\nProcessing review for: {item}")
    review = get_review(item)
    
    if review:
        the_review = review.review
        print(f"Media Type: {the_review.media_type}")
        print(f"Title: {the_review.title}")
        
        # Print type-specific fields
        if isinstance(the_review, BookReview):
            print(f"Author: {the_review.author}")
        elif isinstance(the_review, GameReview):
            print(f"Platform: {the_review.platform}")
            
        print(f"Rating: {the_review.rating}/5")
        print(f"Review: {the_review.review}")




Processing review for: The Matrix (1999 sci-fi film)
Parsing error: Failed to parse ReviewUnion from completion null. Got: 1 validation error for ReviewUnion
  Input should be a valid dictionary or instance of ReviewUnion [type=model_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.9/v/model_type

Processing review for: The Hunger Games by Suzanne Collins
Parsing error: Failed to parse ReviewUnion from completion null. Got: 1 validation error for ReviewUnion
  Input should be a valid dictionary or instance of ReviewUnion [type=model_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.9/v/model_type

Processing review for: Red Dead Redemption 2 for PS4
Parsing error: Failed to parse ReviewUnion from completion {"$defs": {"GameReview": {"properties": {"media_type": {"const": "game", "default": "game", "enum": ["game"], "title": "Media Type", "type": "string"}, "title": {"