# Structured Output

Models can be requested to provide their response in the format matching a schema. This is useful for ensuring the output can be easily parsed in a strucutured fomat and used in subsequent processing. Langchain supports multiple schema types and methods for enforcing structured output. 

## Pydantic

Pydantic provides rich support for field validation, descriptions and nested structures. 

In [None]:
import os
from langchain.chat_models import init_chat_model

os.environ['OPENAI_API_KEY'] = ""

model = init_chat_model("gpt-4.1")
model

ChatOpenAI(profile={'max_input_tokens': 1047576, 'max_output_tokens': 32768, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_message': True, 'tool_choice': True}, client=<openai.resources.chat.completions.completions.Completions object at 0x10d670910>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x10ee015d0>, root_client=<openai.OpenAI object at 0x10e30f700>, root_async_client=<openai.AsyncOpenAI object at 0x10ee01510>, model_name='gpt-4.1', model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [9]:
from pydantic import BaseModel, Field

class Movie(BaseModel):
    title: str=Field(description="The title of the movie"),
    year: int=Field(description="Movie's release year"),
    director: str=Field(description="Director of the movie"),
    cast: str=Field(description="About the cast of the movie"),
    rating: int=Field(description="Rating of the movie out of 10")

In [10]:
modelwithstructuredoutput = model.with_structured_output(Movie)
modelwithstructuredoutput



RunnableBinding(bound=ChatOpenAI(profile={'max_input_tokens': 1047576, 'max_output_tokens': 32768, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_message': True, 'tool_choice': True}, client=<openai.resources.chat.completions.completions.Completions object at 0x10d670910>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x10ee015d0>, root_client=<openai.OpenAI object at 0x10e30f700>, root_async_client=<openai.AsyncOpenAI object at 0x10ee01510>, model_name='gpt-4.1', model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True), kwargs={'response_format': <class '__main__.Movie'>, 'ls_structured_output_format': {'kwargs': {'method': 'json_schema', 'strict': None}, 'schema': {'type': 'fu

In [13]:
modelwithstructuredoutput.invoke("Tell me about the movie Oppenheimer")

Movie(title='Oppenheimer', year=2023, director='Christopher Nolan', cast='Cillian Murphy, Emily Blunt, Matt Damon, Robert Downey Jr., Florence Pugh, Josh Hartnett, Rami Malek, Kenneth Branagh', rating=8)

In [14]:
model.invoke("Tell me about the movie Oppenheimer")

AIMessage(content='**Oppenheimer** is a 2023 biographical epic drama film written and directed by **Christopher Nolan**. The movie is based on the 2005 biography **"American Prometheus: The Triumph and Tragedy of J. Robert Oppenheimer"** by Kai Bird and Martin J. Sherwin. It stars **Cillian Murphy** as J. Robert Oppenheimer, the theoretical physicist who is best known as the "father of the atomic bomb" for his role in leading the Manhattan Project—the World War II effort to develop nuclear weapons.\n\n**Plot Synopsis:**\nThe film delves into Oppenheimer\'s life, focusing particularly on his pivotal contribution to the creation of the atomic bomb, the scientific and ethical dilemmas involved, as well as the political fallout and personal consequences he faced after the bombings of Hiroshima and Nagasaki. The narrative also explores Oppenheimer\'s relationships with colleagues, such as General Leslie Groves (played by Matt Damon), and his complex personal life, including his marriage to 

### Nested Structured Output

In [21]:
from pydantic import BaseModel, Field

class Actor(BaseModel):
    name: str
    age: int
    role: str


class MovieDetails(BaseModel):
    title: str
    year: int
    director: str
    cast: list[Actor]
    rating: int
    budget: float | None = Field(description="Budget of the movie in millions CAD")

model_with_nested_structure = model.with_structured_output(MovieDetails)
model_with_nested_structure.invoke("Tell me about the movie Oppenheimer")

MovieDetails(title='Oppenheimer', year=2023, director='Christopher Nolan', cast=[Actor(name='Cillian Murphy', age=47, role='J. Robert Oppenheimer'), Actor(name='Emily Blunt', age=41, role='Katherine Oppenheimer'), Actor(name='Robert Downey Jr.', age=59, role='Lewis Strauss'), Actor(name='Matt Damon', age=53, role='Leslie Groves')], rating=8, budget=100.0)

## TypedDict

TypedDict provides a way to define a dictionary with a fixed set of keys and values. It is a subclass of dict and is used to define a dictionary with a fixed set of keys and values.  Ideal when we want to enforce a specific structure for a dictionary and don't need runtime validation.

In [18]:
from typing_extensions import Annotated, TypedDict

class MovieDict(TypedDict):
    title: Annotated[str, ..., "The title of the movie"]
    year: Annotated[int, ..., "Movie's release year"]
    director: Annotated[str, ..., "Director of the movie"]
    cast: Annotated[str, ..., "About the cast of the movie"]
    rating: Annotated[int, ...,"Rating of the movie out of 10"]

In [19]:
model_with_typeddict = model.with_structured_output(MovieDict)
model_with_typeddict.invoke("Tell me about the movie Oppenheimer")

{'title': 'Oppenheimer',
 'year': 2023,
 'director': 'Christopher Nolan',
 'cast': 'Cillian Murphy, Emily Blunt, Matt Damon, Robert Downey Jr., Florence Pugh, Benny Safdie, Josh Hartnett, Rami Malek, Kenneth Branagh',
 'rating': 9}

### Nested Structured Output

In [None]:
from typing_extensions import TypedDict

class Actor(TypedDict):
    name: str
    age: int
    role: str


class MovieDetails(TypedDict):
    title: str
    year: int
    director: str
    cast: list[Actor]
    rating: int
    budget: float | None = Field(description="Budget of the movie in millions CAD")

model_with_nested_structure = model.with_structured_output(MovieDetails)
model_with_nested_structure.invoke("Tell me about the movie Oppenheimer")

{'title': 'Oppenheimer',
 'year': 2023,
 'director': 'Christopher Nolan',
 'cast': [{'name': 'Cillian Murphy',
   'age': 46,
   'role': 'J. Robert Oppenheimer'},
  {'name': 'Emily Blunt', 'age': 39, 'role': "Katherine 'Kitty' Oppenheimer"},
  {'name': 'Matt Damon', 'age': 52, 'role': 'Leslie Groves'},
  {'name': 'Robert Downey Jr.', 'age': 58, 'role': 'Lewis Strauss'}],
 'rating': 8,
 'budget': 100000000}

In [23]:
# runtime validation pydantic
from pydantic import BaseModel

class User(BaseModel):
    name: str
    age: int

User(name="Alice", age="twenty")
# ❌ ValidationError


ValidationError: 1 validation error for User
age
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twenty', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/int_parsing

In [24]:
# no runtime validation

from typing import TypedDict

class User(TypedDict):
    name: str
    age: int

data = {"name": "Alice", "age": "twenty"}  # ❌ wrong type
user: User = data  # No runtime error


In [25]:
model.profile

{'max_input_tokens': 1047576,
 'max_output_tokens': 32768,
 'image_inputs': True,
 'audio_inputs': False,
 'video_inputs': False,
 'image_outputs': False,
 'audio_outputs': False,
 'video_outputs': False,
 'reasoning_output': False,
 'tool_calling': True,
 'structured_output': True,
 'image_url_inputs': True,
 'pdf_inputs': True,
 'pdf_tool_message': True,
 'image_tool_message': True,
 'tool_choice': True}

In [None]:
modelwithstructuredoutput.profile # can't access profile

## Data Classes

A Dataclass is a simple way to create a class that is used to store data, although there ain't any restriction on the functionality of the class. You can create a dataclass using the @dataclass decorator.

### Pydantic

In [34]:
from langchain.agents import create_agent
from pydantic import BaseModel, Field

class ContactInfo(BaseModel):
    """Contact information of a person"""
    name: str = Field(description="The name of the person")
    email: str = Field(description="The email of the person")
    phone: str = Field(description="The phone number of the person")


agent = create_agent(
    model="gpt-5",
    response_format=ContactInfo)

result= agent.invoke({ "messages": [{"role": "user", "content": "Extract contact info from : John Doe, Email: johndoe@example.com, Phone: 123-456-7890"}]})

result

{'messages': [HumanMessage(content='Extract contact info from : John Doe, Email: johndoe@example.com, Phone: 123-456-7890', additional_kwargs={}, response_metadata={}, id='72d578cc-4bc8-4922-be9a-bf61598391bf'),
  AIMessage(content='{"name":"John Doe","email":"johndoe@example.com","phone":"123-456-7890"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 869, 'prompt_tokens': 208, 'total_tokens': 1077, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 832, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-Cr8bIzMyjrTcMSiCFFwuhN2C6pYY0', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019b5c52-a52a-7ec0-8d8c-36cbe31a7582-0', usage_metadata={'input_tokens': 208, 'output_tokens': 869, 'tota

In [35]:
result["structured_response"]

ContactInfo(name='John Doe', email='johndoe@example.com', phone='123-456-7890')

### TypedDict

In [None]:
from langchain.agents import create_agent
from typing_extensions import TypedDict

class ContactInfo(TypedDict):
    """Contact information of a person"""
    name: str #The name of the person
    email: str #The email of the person
    phone: str #The phone number of the person


agent = create_agent(
    model="gpt-5",
    response_format=ContactInfo)

result= agent.invoke({ "messages": [{"role": "user", "content": "Extract contact info from : John Doe, Email: johndoe@example.com, Phone: 123-456-7890"}]})

result

{'messages': [HumanMessage(content='Extract contact info from : John Doe, Email: johndoe@example.com, Phone: 123-456-7890', additional_kwargs={}, response_metadata={}, id='29db1e44-0759-4b53-9214-10cefa60a970'),
  AIMessage(content='{"name":"John Doe","email":"johndoe@example.com","phone":"123-456-7890"}', additional_kwargs={'parsed': None, 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 421, 'prompt_tokens': 183, 'total_tokens': 604, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 384, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-Cr8hS4u1wN6GcIVI2GIvkfgD9WNDG', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019b5c58-75f7-7d03-bf09-19eee707a469-0', usage_metadata={'input_tokens': 183, 'output_tokens': 421, 'total

In [None]:
from dataclasses import dataclass
from langchain.agents import create_agent
from typing import TypedDict

@dataclass
class ContactInfo(TypedDict):
    name: str
    email: str
    phone: str