# Pydantic output Parser

In [1]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, PromptTemplate, ChatPromptTemplate
import textwrap

In [2]:
base_url = "http://localhost:11434"
model = "llama3.2:3b"

llm = ChatOllama(base_url=base_url,
                 model=model,
                 temperature=0.5,
                 num_predict=256
                 )

In [3]:
from typing import Optional
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser

In [4]:
class Joke(BaseModel):
    setup: str = Field(description='the setup of the joke')
    punchline: str = Field(description='the punchline of the joke')
    rating: Optional[int] = Field(description='the rating of the joke is from 1 to 10', default=None)

In [5]:
parser = PydanticOutputParser(pydantic_object=Joke)
parser

PydanticOutputParser(pydantic_object=<class '__main__.Joke'>)

In [6]:
print(textwrap.fill(parser.get_format_instructions()))

The output should be formatted as a JSON instance that conforms to the
JSON schema below.  As an example, for the schema {"properties":
{"foo": {"title": "Foo", "description": "a list of strings", "type":
"array", "items": {"type": "string"}}}, "required": ["foo"]} the
object {"foo": ["bar", "baz"]} is a well-formatted instance of the
schema. The object {"properties": {"foo": ["bar", "baz"]}} is not
well-formatted.  Here is the output schema: ``` {"properties":
{"setup": {"description": "the setup of the joke", "title": "Setup",
"type": "string"}, "punchline": {"description": "the punchline of the
joke", "title": "Punchline", "type": "string"}, "rating": {"anyOf":
[{"type": "integer"}, {"type": "null"}], "default": null,
"description": "the rating of the joke is from 1 to 10", "title":
"Rating"}}, "required": ["setup", "punchline"]} ```


In [7]:
prompt = PromptTemplate(
    template="""
        Answer the user query with a joke. Here is yout formatting instruction.
        {format_instruction}

        query: {query}
        answer:""",
    input_variables=['query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
    )

prompt

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={'format_instruction': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"description": "the setup of the joke", "title": "Setup", "type": "string"}, "punchline": {"description": "the punchline of the joke", "title": "Punchline", "type": "string"}, "rating": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "the rating of the joke is from 1 to 10", "title": "Rating"}}, "required": ["setup", "punchline"]}\n```'}, template='\n        Answer the user quer

In [8]:
chain = prompt | llm

In [9]:
output = chain.invoke({'query': 'Hi, I have a cat and a dog.'})
print(output.content)

{"properties": {"setup": {"description": "Why did your cat and dog go to the vet?", "title": "Why did your cat and dog go to the vet?", "type": "string"}, "punchline": {"description": "Because they were feeling ruff and paws-itive!", "title": "Punchline", "type": "string"}, "rating": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "the rating of the joke is from 1 to 10", "title": "Rating"}}}


In [10]:
chain = prompt | llm | parser
output = chain.invoke({'query': 'Hi, I have a cat and a dog.'})
print(output)

setup='Why did the cat and the dog go to therapy?' punchline='Because they were having a ruff relationship!' rating=8


## Parsing with_structured_output

In [11]:
output = llm.invoke('Tell me a joke about programmers')
print(output.content)

Why do programmers prefer dark mode?

Because light attracts bugs.


In [12]:
structure_llm = llm.with_structured_output(Joke)

In [13]:
structure_llm

RunnableBinding(bound=ChatOllama(model='llama3.2:3b', num_predict=256, temperature=0.5, base_url='http://localhost:11434'), kwargs={'tools': [{'type': 'function', 'function': {'name': 'Joke', 'description': '', 'parameters': {'properties': {'setup': {'description': 'the setup of the joke', 'type': 'string'}, 'punchline': {'description': 'the punchline of the joke', 'type': 'string'}, 'rating': {'anyOf': [{'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'the rating of the joke is from 1 to 10'}}, 'required': ['setup', 'punchline'], 'type': 'object'}}}], 'tool_choice': 'any'}, config={}, config_factories=[])
| PydanticToolsParser(first_tool_only=True, tools=[<class '__main__.Joke'>])

In [14]:
output = structure_llm.invoke('Tell me a joke about programmers')
print(output)

setup='Why do programmers prefer dark mode?' punchline='Because light attracts bugs.' rating=8


# Json Output Parser

In [15]:
from langchain_core.output_parsers import JsonOutputParser

In [17]:
json_parser = JsonOutputParser(pydantic_object=Joke)
print(json_parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"setup": {"description": "the setup of the joke", "title": "Setup", "type": "string"}, "punchline": {"description": "the punchline of the joke", "title": "Punchline", "type": "string"}, "rating": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "the rating of the joke is from 1 to 10", "title": "Rating"}}, "required": ["setup", "punchline"]}
```


In [18]:
prompt = PromptTemplate(
    template="""
        Answer the user query with a joke. Here is yout formatting instruction.
        {format_instruction}

        query: {query}
        answer:""",
    input_variables=['query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
    )

chain = prompt | llm | json_parser
output = chain.invoke({'query': 'Hi, I have a cat and a dog.'})
print(output)

{'properties': {'setup': {'description': 'Why did the cat join a band?', 'title': 'Setup', 'type': 'string'}, 'punchline': {'description': 'Because it wanted to be the purr-cussionist!', 'title': 'Punchline', 'type': 'string'}, 'rating': {'anyOf': [{'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'the rating of the joke is from 1 to 10', 'title': 'Rating'}}}


In [21]:
output = output.get('properties')

In [24]:
for key, value in output.items():
    print(key, value)

setup {'description': 'Why did the cat join a band?', 'title': 'Setup', 'type': 'string'}
punchline {'description': 'Because it wanted to be the purr-cussionist!', 'title': 'Punchline', 'type': 'string'}
rating {'anyOf': [{'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'the rating of the joke is from 1 to 10', 'title': 'Rating'}


# CSV output parser

In [25]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser

In [32]:
parser = CommaSeparatedListOutputParser()
print(parser.get_format_instructions())

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [33]:
prompt = PromptTemplate(
    template="""
        Answer the user query with a joke. Here is yout formatting instruction.
        {format_instruction}

        query: {query}
        answer:""",
    input_variables=['query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
    )

In [34]:
chain = prompt | llm | parser
output = chain.invoke({'query': 'generate a website seo keywords. I have content about eletric guitar instruments, tools and guitar effects'})
print(output)

['electric guitar', 'electric guitar effects', 'guitar pedals', 'amp accessories', 'music gear', 'instrument accessories', 'effects processing', 'sound enhancement', 'music equipment', 'rock music', 'blues', 'jazz', 'guitarists', 'music technology']


# Datetime output parsing

In [35]:
from langchain.output_parsers import DatetimeOutputParser

In [36]:
parser = DatetimeOutputParser()
print(parser.get_format_instructions())

Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 1191-06-28T19:12:16.278038Z, 0172-01-25T10:05:34.337591Z, 1818-01-15T00:19:41.688835Z

Return ONLY this string, no other words!


In [37]:
prompt = PromptTemplate(
    template="""
        Answer the user query with a datetime. Here is yout formatting instruction.
        {format_instruction}

        query: {query}
        answer:""",
    input_variables=['query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
    )

In [38]:
chain = prompt | llm | parser
output = chain.invoke({'query': 'when is the next hollyday?'})
print(output)

2023-12-25 00:00:00


In [39]:
output = chain.invoke({'query': 'when america got discovered?'})
print(output)

1492-10-12 04:00:00


In [41]:
output = chain.invoke({'query': 'when Israel turns to became a nation again?'})
print(output)

1958-05-14 04:15:00
