In [1]:
!pip install langchain langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.18-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.0-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB

### Using Pydantic with LangChain

In [4]:
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI

In [42]:
llm = ChatOpenAI(temperature=0, api_key = "*************************")

### 1️⃣ Validating LLM Inputs with Pydantic

In [6]:
llm.invoke("What is AI?")

AIMessage(content='AI, or artificial intelligence, refers to the simulation of human intelligence processes by machines, especially computer systems. These processes include learning, reasoning, problem-solving, perception, and language understanding. AI technologies are used in a wide range of applications, such as speech recognition, image recognition, natural language processing, and autonomous vehicles.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 65, 'prompt_tokens': 11, 'total_tokens': 76, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f755b80c-a9c8-4f34-880f-c0ca3dab020c-0')

In [7]:
class pydantic_prompt(BaseModel):
  prompt: str = Field(..., min_length=5, max_length=50, description="Input text for the model")
  max_tokens: int = Field(..., ge=10, le=500, description="Limit response length")

In [8]:
validated_input  = pydantic_prompt(**{"prompt" : "What is AI?", "max_tokens" : 100})

In [9]:
validated_input

pydantic_prompt(prompt='What is AI?', max_tokens=100)

In [10]:
llm.invoke(validated_input .prompt)

AIMessage(content='AI, or artificial intelligence, refers to the simulation of human intelligence processes by machines, especially computer systems. These processes include learning, reasoning, problem-solving, perception, and language understanding. AI technologies are used in a wide range of applications, such as speech recognition, image recognition, natural language processing, and autonomous vehicles.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 65, 'prompt_tokens': 11, 'total_tokens': 76, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-b87ab530-d56a-4f56-94ce-b7ee426ba021-0')

In [11]:
###Lets give wrong input
validated_input  = pydantic_prompt(**{"prompt" : "AI?", "max_tokens" : 100})

ValidationError: 1 validation error for pydantic_prompt
prompt
  String should have at least 5 characters [type=string_too_short, input_value='AI?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/string_too_short

### 2️⃣ Validating LLM Outputs with Pydantic

In [13]:
class LLMResponse(BaseModel):
    text: str
    model: str
    tokens_used: int

In [14]:
response_data = {"text": "AI stands for Artificial Intelligence.", "model": "gpt-4", "tokens_used": 35}

In [15]:
validated_response = LLMResponse(**response_data)

In [16]:
print(validated_response)

text='AI stands for Artificial Intelligence.' model='gpt-4' tokens_used=35


### 4️⃣ Generating Structured Output from LLMs

LangChain's OutputParser can work with Pydantic to enforce structured AI output.

In [17]:
from langchain.output_parsers import PydanticOutputParser

In [18]:
from langchain.prompts import PromptTemplate
from pydantic import BaseModel

In [19]:
# Define expected output format
class FAQ(BaseModel):
    question: str
    answer: str

In [20]:
# Define Output Parser
parser = PydanticOutputParser(pydantic_object=FAQ)

In [21]:
# Create prompt with expected format
prompt = PromptTemplate(
    template="Generate a FAQ entry for: {topic}\n{format_instructions}",
    input_variables=["topic"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [22]:
# Format input
formatted_prompt = prompt.format(topic="Artificial Intelligence")

In [24]:
print(formatted_prompt)

Generate a FAQ entry for: Artificial Intelligence
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"question": {"title": "Question", "type": "string"}, "answer": {"title": "Answer", "type": "string"}}, "required": ["question", "answer"]}
```


In [26]:
formatted_response = llm.invoke(formatted_prompt)

In [28]:
formatted_response.content

'{\n  "question": "What is artificial intelligence?",\n  "answer": "Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think and act like humans. These machines are designed to mimic human cognitive functions such as learning, problem-solving, perception, and decision-making."\n}'

In [30]:
parsed_faq = parser.parse(formatted_response.content)


In [31]:
parsed_faq

FAQ(question='What is artificial intelligence?', answer='Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think and act like humans. These machines are designed to mimic human cognitive functions such as learning, problem-solving, perception, and decision-making.')

In [37]:
chain = prompt | llm

In [39]:
chain.invoke({"topic": "Artificial Intelligence"})

AIMessage(content='{\n  "question": "What is artificial intelligence?",\n  "answer": "Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think and act like humans. These machines are designed to mimic human cognitive functions such as learning, problem-solving, perception, and decision-making."\n}', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 65, 'prompt_tokens': 177, 'total_tokens': 242, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a6dbccec-28bc-4e74-9bc6-0b067f1881c3-0')

In [40]:
chain = prompt | llm | parser

In [41]:
chain.invoke({"topic": "Artificial Intelligence"})

FAQ(question='What is artificial intelligence?', answer='Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think and act like humans. These machines are designed to mimic human cognitive functions such as learning, problem-solving, perception, and decision-making.')

### 5️⃣ Pydantic with LangChain Agents & Tools

LangChain Agents use Tools (functions) to execute tasks dynamically. Pydantic helps validate tool inputs and outputs.

Key Takeaways
✅ Validate LLM inputs before sending requests
✅ Enforce structured responses from AI models
✅ Ensure consistency in AI pipelines (retrieval, memory, agents)
✅ Prevent API errors in LangChain tools & agents