# LangChain - Parsing

In [1]:
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, AIMessagePromptTemplate

In [2]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

In [3]:
api_key = open('../../api_key.txt').read()

In [4]:
model = ChatOpenAI(openai_api_key=api_key)

## Comma Separated List Output Parser

In [5]:
from langchain.output_parsers import CommaSeparatedListOutputParser

In [6]:
output_parser = CommaSeparatedListOutputParser()

### Understanding Syntax

In [7]:
output_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

In [8]:
reply = 'red, green, blue'

In [9]:
reply.split(',')

['red', ' green', ' blue']

In [10]:
output_parser.parse(reply)

['red', 'green', 'blue']

In [11]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [12]:
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])

### Example 1

In [13]:
model_request = chat_prompt.format_prompt(
    request="give me 5 characteristics of dogs",
    format_instructions=output_parser.get_format_instructions()).to_messages()

In [14]:
result = model(model_request)

In [15]:
print(result.content)

loyal, playful, protective, social, intelligent


In [16]:
output_parser.parse(result.content)

['loyal', 'playful', 'protective', 'social', 'intelligent']

### Example 2

In [17]:
model_request = chat_prompt.format_prompt(
    request="write me a blog post about dogs",
    format_instructions=output_parser.get_format_instructions()).to_messages()

In [18]:
result = model(model_request)

In [19]:
print(result.content)

The Joy of Dogs, Unconditional Love, Adventure Companions, Stress Relievers, Loyal Friends, Constant Entertainment, Exercise Motivators, Heartwarming Greetings, Emotional Support, Furry Family Members


In [20]:
output_parser.parse(result.content)

['The Joy of Dogs',
 'Unconditional Love',
 'Adventure Companions',
 'Stress Relievers',
 'Loyal Friends',
 'Constant Entertainment',
 'Exercise Motivators',
 'Heartwarming Greetings',
 'Emotional Support',
 'Furry Family Members']

## Date Time Parser

In [21]:
from langchain.output_parsers import DatetimeOutputParser

In [22]:
output_parser = DatetimeOutputParser()

In [23]:
output_parser.get_format_instructions()

'Write a datetime string that matches the \n            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 0782-12-09T12:07:16.136402Z, 1503-01-15T05:11:40.144188Z, 1521-05-22T12:51:47.854401Z'

In [24]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [25]:
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])

In [26]:
model_request = chat_prompt.format_prompt(
    request="What date was the 13th Amendment ratified in the US?",
    format_instructions=output_parser.get_format_instructions()).to_messages()

In [27]:
result = model(model_request, temperature=0)

In [28]:
print(result.content)

The 13th Amendment was ratified in the US on December 6, 1865.

The datetime string that matches the given pattern is: "1865-12-06T00:00:00.000000Z"


In [29]:
output_parser.parse(result.content)

OutputParserException: Could not parse datetime string: The 13th Amendment was ratified in the US on December 6, 1865.

The datetime string that matches the given pattern is: "1865-12-06T00:00:00.000000Z"

### Fixing the Output

#### Method 1: System Prompt

In [30]:
system_template = "You always reply to questions only in datetime patterns."
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [31]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [32]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [33]:
model_request = chat_prompt.format_prompt(
    request="What date was the 13th Amendment ratified in the US?",
    format_instructions=output_parser.get_format_instructions()).to_messages()

In [34]:
result = model(model_request, temperature=0)

In [35]:
print(result.content)

1865-12-06T00:00:00.000000Z


In [36]:
output_parser.parse(result.content)

datetime.datetime(1865, 12, 6, 0, 0)

#### Method 2: Output Fixing Parser

In [37]:
from langchain.output_parsers import OutputFixingParser

In [38]:
system_template = "You always reply to questions only in datetime patterns."
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [39]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [40]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [41]:
model_request = chat_prompt.format_prompt(
    request="What date was the 13th Amendment ratified in the US?",
    format_instructions=output_parser.get_format_instructions()).to_messages()

In [42]:
result = model(model_request, temperature=0)

In [43]:
print(result.content)

1865-12-06T00:00:00.000000Z


In [44]:
misformatted = result.content
misformatted

'1865-12-06T00:00:00.000000Z'

In [45]:
output_fixing_parser = OutputFixingParser.from_llm(parser=output_parser, llm=model)
output_fixing_parser.parse(misformatted)

datetime.datetime(1865, 12, 6, 0, 0)

## Pyandtic Parser

In [46]:
from langchain.output_parsers import PydanticOutputParser

In [47]:
from pydantic import BaseModel, Field

In [48]:
class Scientist(BaseModel):
    
    name: str = Field(description="Name of a Scientist")
    discoveries: list = Field(description="Python list of discoveries")

In [49]:
parser = PydanticOutputParser(pydantic_object=Scientist)

In [50]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "Name of a Scientist", "title": "Name", "type": "string"}, "discoveries": {"description": "Python list of discoveries", "items": {}, "title": "Discoveries", "type": "array"}}, "required": ["name", "discoveries"]}
```


In [51]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [52]:
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])

In [53]:
model_request = chat_prompt.format_prompt(
    request="Tell me about a famous scientist",
    format_instructions=parser.get_format_instructions()
).to_messages()

In [54]:
result = model(model_request, temperature=0)

In [55]:
result.content

'{"name": "Albert Einstein", "discoveries": ["Theory of Relativity", "Photoelectric Effect"]}'

In [56]:
parser.parse(result.content)

Scientist(name='Albert Einstein', discoveries=['Theory of Relativity', 'Photoelectric Effect'])