Pydantic is a 'data validation library' for python.
1. Works with python type annotations. But
rather than static type checking, they are
actively used at runtime for data validation
and conversion.
2. Provides built-in methods to
serialize/deserialize models to/from JSON,
dictionaries, etc.
3. LangChain leverages Pydantic to create JSON
Scheme describing function.

For eg normal class in python is created like this:


class User:
    def __init__(self,name:str,age:int,email:str):
        self.name = name
        self.age = age
        self.email = email


But with the pydantic library it is like this:

from pydantic import BaseModel

class User(BaseModel):
    name:str
    age:int
    email:str
    


In [1]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [2]:
from typing import List
from pydantic import BaseModel, Field

In [4]:
# normal python user class
class User:
    def __init__(self, name:str, age:int, email:str):
        self.name = name
        self.age = age
        self.email = email


In [5]:
foo = User(name =  "Ananay",age = 20, email = "ananay@gmail.com")

In [6]:
foo

<__main__.User at 0x100e0e8>

In [7]:
foo.name

'Ananay'

In [10]:
# you see that even we have mentioned that the age should be the intger then also the age is taking the input
foo = User(name =  "Ananay",age = "bar", email = "ananay@gmail.com")

In [11]:
foo.age

'bar'

In [12]:
# now creating the class using the pydantic library
class pUser(BaseModel):
    name:str
    age:int
    email:str

In [13]:
foo_p = pUser(name="Ananay",age=20,email="Anandkadaddy" )

In [14]:
foo_p

pUser(name='Ananay', age=20, email='Anandkadaddy')

In [None]:
# now this will raise the error
foo_p = pUser(name="Ananay",age="tyagi",email="Anandkadaddy" )

In [21]:
# Another advantage is that it helps us to nest these data structures
class Class(BaseModel):
    students: List[pUser]

In [22]:
obj = Class(
    students=[pUser(name="Ananay",age=20,email="ananay@gmail.com")]
)

In [23]:
obj

Class(students=[pUser(name='Ananay', age=20, email='ananay@gmail.com')])

Pydantic to OpenAI function definition

In [24]:
# description to a fuction are needed while the description for the arguments are not nesscessary
class WeatherSearch(BaseModel):
    """Call this with the airport code to get the weather at that airport"""
    airport_code:str = Field(description = "airport code to get the weather for")

In [25]:
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

In [27]:
weather_function = convert_pydantic_to_openai_function(WeatherSearch)

In [29]:
weather_function # see how this is converted in the json function

{'name': 'WeatherSearch',
 'description': 'Call this with the airport code to get the weather at that airport',
 'parameters': {'properties': {'airport_code': {'description': 'airport code to get the weather for',
    'type': 'string'}},
  'required': ['airport_code'],
  'type': 'object'}}

In [30]:
from langchain.chat_models import ChatOpenAI

In [31]:
model = ChatOpenAI()

  model = ChatOpenAI()


In [35]:
model.invoke("what is the weather in DL today ?", functions = [weather_function])

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"airport_code":"DL"}', 'name': 'WeatherSearch'}}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 71, 'total_tokens': 88, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'function_call', 'logprobs': None}, id='run-9e2a18a9-3a91-4216-9d7f-b0987dca1337-0')

So we can beasically bind the model with the function

In [36]:
model_with_functionn = model.bind(functions = [weather_function])

In [37]:
model_with_functionn.invoke("whats the weather like in dl today")

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"airport_code":"DL"}', 'name': 'WeatherSearch'}}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 71, 'total_tokens': 88, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'function_call', 'logprobs': None}, id='run-e6ebc0d4-c123-4986-8d6c-d1b0bf0466b2-0')

Using in a chain
We can use the model bound to function in a chain as we normally would

In [43]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

In [44]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant"),
        ("user", "{input}")
    ]
)
output_parser = StrOutputParser()

In [47]:
chain = prompt | model_with_functionn

In [48]:
chain.invoke({"input":"Whats the weather today like in RSNR"})

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"airport_code":"RSNR"}', 'name': 'WeatherSearch'}}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 78, 'total_tokens': 96, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'function_call', 'logprobs': None}, id='run-3e87053f-a668-402a-99f7-2548cc17eaca-0')

Using Multiple Functions 

In [49]:
class ArtistSearch(BaseModel):
    """Call this to get the names of songs of a particular artist"""
    artist_name : str = Field(description = "name of the artist to get the songs for")
    n: int = Field(description = "number of the songs")

In [50]:
functions = [
    convert_pydantic_to_openai_function(ArtistSearch),
    convert_pydantic_to_openai_function(WeatherSearch)
]

In [53]:
model_with_functions = model.bind(functions = functions)

The model decides which to choose, according to our query

In [54]:
model_with_functions.invoke("what is the weather like in DL ?")

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"airport_code":"DL"}', 'name': 'WeatherSearch'}}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 122, 'total_tokens': 139, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'function_call', 'logprobs': None}, id='run-8aa0193b-e71c-4507-81ac-8999d865ee3f-0')

In [55]:
model_with_functions.invoke("Give me the 3 songs of Arijit Singh")

AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"artist_name":"Arijit Singh","n":3}', 'name': 'ArtistSearch'}}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 125, 'total_tokens': 149, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'function_call', 'logprobs': None}, id='run-8f917653-0589-49fb-ac62-688f89d022eb-0')

In [57]:
model_with_functions.invoke("what is the date today ?")

AIMessage(content="I'm sorry, but I don't have access to real-time information such as the current date. You can check the date on your device or by searching online. If you need any other assistance or information, feel free to ask!", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 49, 'prompt_tokens': 120, 'total_tokens': 169, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c0161f39-b973-45e7-80cf-aaebe579fc51-0')