# Parsing Output

Often when connecting to outputs of LLMs we need the output in specific format

In [30]:
import os
from langchain_openai import OpenAI, ChatOpenAI
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate

from langchain.output_parsers import  CommaSeparatedListOutputParser, DatetimeOutputParser, PydanticOutputParser
from pydantic import BaseModel, Field

api_key = os.getenv("OPENAI_API_KEY")

In [47]:
# Initialize the model
model = ChatOpenAI(model="gpt-4o", openai_api_key=api_key)

### List Parsing

In [12]:
output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [13]:
# Now to create a proper prompt using this
reply = "one, two, three"
output_parser.parse(reply)

['one', 'two', 'three']

In [15]:
human_template = '{request}{format_instructions}'
human_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

chat_prompt.format_prompt(request="give me 5 characteristics of pet dogs. ", format_instructions=output_parser.get_format_instructions())

ChatPromptValue(messages=[HumanMessage(content='give me 5 characteristics of pet dogs. Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`', additional_kwargs={}, response_metadata={})])

In [16]:
# Pass to the model
request = chat_prompt.format_prompt(request="give me 5 characteristics of pet dogs. ", format_instructions=output_parser.get_format_instructions()).to_messages()

response = model(request)
print(response.content)

  response = model(request)


loyal, playful, social, protective, affectionate


In [18]:
# We can convert to desired output - output in the form of a list
output_parser.parse(response.content)

['loyal', 'playful', 'social', 'protective', 'affectionate']

### Datetime Parser

In [20]:
output_parser = DatetimeOutputParser()
print(output_parser.get_format_instructions())

Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 0422-02-11T16:24:19.265732Z, 1887-08-16T21:49:20.340637Z, 1964-10-13T16:26:34.139412Z

Return ONLY this string, no other words!


In [22]:
template_text = "{request}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(template_text)
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [23]:
# Construct the prompt and pass to the model
print(chat_prompt.format(request="When was the 13th Ammendment ratified in the US?", format_instructions=output_parser.get_format_instructions()))

Human: When was the 13th Ammendment ratified in the US?
Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 0544-04-24T03:32:17.788149Z, 0533-03-09T09:58:27.144409Z, 0037-12-31T19:48:45.559557Z

Return ONLY this string, no other words!


In [27]:
request = chat_prompt.format_prompt(request="When was the 13th Ammendment ratified in the US?", format_instructions=output_parser.get_format_instructions()).to_messages()
response = model(request, temperature = 0)
print(response.content)

1865-12-06T00:00:00.000000Z


In [28]:
output_parser.parse(response.content)

datetime.datetime(1865, 12, 6, 0, 0)

### Pydantic JSON Parser

In [31]:
# DEFINE THE JSON STRUCTURE
class Scientist(BaseModel):

    name: str = Field(description="Name of a Scientist")
    discoveries: list = Field(description="Python list of discoveries")

In [32]:
query = 'Name any famous Scientist and list of his/her discoveries'

In [33]:
parser = PydanticOutputParser(pydantic_object=Scientist)
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "Name of a Scientist", "title": "Name", "type": "string"}, "discoveries": {"description": "Python list of discoveries", "items": {}, "title": "Discoveries", "type": "array"}}, "required": ["name", "discoveries"]}
```


In [45]:
# Construct the Prompt

prompt = PromptTemplate(
    template="Answer the query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

Answer the query.
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "Name of a Scientist", "title": "Name", "type": "string"}, "discoveries": {"description": "Python list of discoveries", "items": {}, "title": "Discoveries", "type": "array"}}, "required": ["name", "discoveries"]}
```
Tell me about any famous Scientist



In [57]:
# Pass to the model

#my_query="Tell me about any famous Scientist"
chain = prompt | model | parser
response = chain.invoke({"query": "Tell me about any famous Scientist"})

print(response.json)

<bound method BaseModel.json of Scientist(name='Albert Einstein', discoveries=['Theory of Relativity', 'Photoelectric Effect', 'Brownian Motion', 'Mass-Energy Equivalence (E=mc^2)'])>
