# JSON parser
- Allows users to specify an arbitrary JSON schema and query LLMs for output that conform to that schema

In [10]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [4]:
llm = ChatOpenAI(model="gpt-4o-mini",temperature=0)

In [5]:
# Define a structure
class Joke(BaseModel):
    setup: str = Field(..., description="The setup of the joke")
    punchline: str = Field(..., description="The punchline of the joke")

In [6]:
joke_query = "Tell me a joke"

#setup up a parser + inject instructions into the prompt template
parser = JsonOutputParser(pydantic_object=Joke)
prompt = PromptTemplate(
    template="Answer the question based on the context below.\n\n{format_instructions}\n\n{question}",
    input_variables=["question"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser
chain.invoke({"question": joke_query})


{'setup': "Why don't scientists trust atoms?",
 'punchline': 'Because they make up everything!'}

In [23]:
def call_json_output_parser():
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system","You are a helpful assistant to extract information from the following phrase\nFormatting Instructions:{formatting_instructions}",
            ),
            ("user", "{phrase}"),
        ]
    )
    class Person(BaseModel):
        name: str = Field(description="The name of the person")
        age: int = Field(description="The age of the person")
       
    parser = JsonOutputParser(pydantic_object=Person)
    chain = prompt | llm | parser
    resp = chain.invoke({"phrase": "John is 30 years old", "formatting_instructions": parser.get_format_instructions()})
    return resp

In [24]:
response = call_json_output_parser()
print(response)

{'name': 'John', 'age': 30}


In [26]:
from typing import List
prompt = prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system","You are a helpful assistant to extract information from the following phrase\nFormatting Instructions:{formatting_instructions}",
            ),
            ("user", "{phrase}"),
        ]
    )
class FoodReceipes(BaseModel):
    food:str = Field(description="Name of the food")
    ingredients:List[str] = Field(description="List of ingredients")

receipe_parser = JsonOutputParser(pydantic_object=FoodReceipes)
chain = prompt | llm | receipe_parser
response = chain.invoke({"phrase": "Prepare the Margherita pizza with tomatoes, onions, cheese, bell-pepper",
                        "formatting_instructions":receipe_parser.get_format_instructions()})
print(response)

{'food': 'Margherita pizza', 'ingredients': ['tomatoes', 'onions', 'cheese', 'bell-pepper']}


In [1]:
{
    "gift":False,
    "delivery_days":5,
    "price_value":"pretty expensive"
}

{'gift': False, 'delivery_days': 5, 'price_value': 'pretty expensive'}

In [2]:
customer_review =""" 
    This leaf blower is pretty amazing. It has a nice flow of air and the right amount of power. 
    The only complaint is that the blower takes a long time to start and I find myself waiting 5 minutes. It arrived in
    two days, just in time for my wife's anniversary present. I think this would make a good present for her. Its slightly expensive than the 
    other leaf blowers and its worth for the extra features it have
"""
review_template =""" 
For the following text, extract the following information.
gift : was the item purchased as a gift for someone else? Answer True if yes, False if no.
delivery_days : how many days did it take for the product to arrive? Only count Business days.
price_value : Extract any sentences about the value of the product.

Format the output as JSON with the following keys.
gift
delivery_days
price_value

text: {text}
"""


In [3]:
from langchain.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_template(review_template)
print(prompt_template)

input_variables=['text'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], template=' \nFor the following text, extract the following information.\ngift : was the item purchased as a gift for someone else? Answer True if yes, False if no.\ndelivery_days : how many days did it take for the product to arrive? Only count Business days.\nprice_value : Extract any sentences about the value of the product.\n\nFormat the output as JSON with the following keys.\ngift\ndelivery_days\nprice_value\n\ntext: {text}\n'))]


In [5]:
from langchain_openai import ChatOpenAI
messages =prompt_template.format_messages(text =customer_review)
chat = ChatOpenAI(temperature=0.0, model="gpt-3.5-turbo-1106")
response = chat(messages)
print(response.content)

{
  "gift": true,
  "delivery_days": 2,
  "price_value": "Its slightly expensive than the other leaf blowers and its worth for the extra features it have"
}


In [6]:
type(response.content)

str

# Parse LLM output string into a Python dictionary


In [7]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [18]:
gift_schema = ResponseSchema(name="gift",
                             description="was the item purchased as a gift for someone else? Answer True if yes, False if not known",
                             )
delivery_date_schema = ResponseSchema(name="delivery_days",
                                      description="How many days did it take for the product to get deliverd. If this is not found output -1",
                                      )
price_value_schema = ResponseSchema(name="price_value",
                                    description="Extract any information about the price or value and output them as comma separated python list",
                                    )
response_schemas = [gift_schema,
                  delivery_date_schema,
                  price_value_schema]

In [19]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [20]:
format_instructions = output_parser.get_format_instructions()
format_instructions


'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"gift": string  // was the item purchased as a gift for someone else? Answer True if yes, False if not known\n\t"delivery_days": string  // How many days did it take for the product to get deliverd. If this is not found output -1\n\t"price_value": string  // Extract any information about the price or value and output them as comma separated python list\n}\n```'

In [21]:
review_template2 = """ 
For the following text extract the following information.
gift: Was the item purchased as a gift for someone else? Answer with "yes" or "no".

delivery_days: How many days did it take for the product to arrive? If this information is not found, write "unknown".

price_value: Extract the price of the product from the text. If the price is not found, write "unknown".

Return your response in the following format:

text: {text}
{format_instructions}
"""

prompt = ChatPromptTemplate.from_template(review_template2)

In [22]:
messages = prompt.format_messages(text=customer_review, format_instructions = format_instructions)
print(messages[0].content)

 
For the following text extract the following information.
gift: Was the item purchased as a gift for someone else? Answer with "yes" or "no".

delivery_days: How many days did it take for the product to arrive? If this information is not found, write "unknown".

price_value: Extract the price of the product from the text. If the price is not found, write "unknown".

Return your response in the following format:

text:  
    This leaf blower is pretty amazing. It has a nice flow of air and the right amount of power. 
    The only complaint is that the blower takes a long time to start and I find myself waiting 5 minutes. It arrived in
    two days, just in time for my wife's anniversary present. I think this would make a good present for her. Its slightly expensive than the 
    other leaf blowers and its worth for the extra features it have

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json

In [23]:
response = chat(messages)
print(response.content)

```json
{
	"gift": "yes",
	"delivery_days": "2",
	"price_value": "unknown"
}
```


In [28]:
output_dict = output_parser.parse(response.content)
output_dict

{'gift': 'yes', 'delivery_days': '2', 'price_value': 'unknown'}

In [26]:
type(output_dict)

dict

In [30]:
output_dict.get('delivery_days')

'2'

In [None]:
# Example 4
from typing import List
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, FewShotPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv
load_dotenv()

# Comma Separated List output Parser

In [12]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser

In [17]:
def call_list_output_parser():
    prompt = ChatPromptTemplate.from_messages([
        ("system","Generate a list of 10 synonyms forr the following word. Ruturn the result in a comma separated list."),
        ("human", "{word}")
    ]     
    )

    parser = CommaSeparatedListOutputParser()
    chain = prompt | llm | parser
    res = chain.invoke({"word": "Happy"})
    return res

response = call_list_output_parser()
print(response)
print(type(response))

['Joyful', 'Cheerful', 'Elated', 'Content', 'Delighted', 'Pleased', 'Blissful', 'Gleeful', 'Ecstatic', 'Jubilant']
<class 'list'>
