In [70]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser, PandasDataFrameOutputParser, ResponseSchema, StructuredOutputParser
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser, JsonKeyOutputFunctionsParser, PydanticOutputFunctionsParser, PydanticAttrOutputFunctionsParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
load_dotenv()

True

## Output Parsers

### CSV Parser (List)

In [11]:
csv_parser = CommaSeparatedListOutputParser()

csv_instructions = csv_parser.get_format_instructions()

prompt = PromptTemplate(
    template="Tell me the ingredients for making {food}.\n{format_instructions}",
    input_variables=["food"],
    partial_variables={"format_instructions": csv_instructions},
)

llm = ChatOpenAI()

chain = {"food": RunnablePassthrough()} | prompt | llm | csv_parser

In [12]:
chain.invoke("apple pie")

['apples',
 'sugar',
 'flour',
 'cinnamon',
 'nutmeg',
 'butter',
 'lemon juice',
 'salt',
 'pie crust']

### String Parser

In [13]:
str_parser = StrOutputParser()

prompt = PromptTemplate.from_template(
    "Tell me the recipe for making {food}"
)

chain = {"food": RunnablePassthrough()} | prompt | llm | str_parser

In [15]:
for chunk in chain.stream("roasted beef stew"):
    print(chunk, end="", flush=True)

Ingredients:
- 2 lbs beef chuck, cut into 1-inch cubes
- 2 tbsp olive oil
- 1 onion, chopped
- 3 cloves garlic, minced
- 2 carrots, peeled and chopped
- 2 celery stalks, chopped
- 1 tsp dried thyme
- 1 tsp dried rosemary
- 1 tsp paprika
- 2 tbsp tomato paste
- 4 cups beef broth
- 1 cup red wine
- 2 cups potatoes, peeled and diced
- Salt and pepper, to taste
- Chopped fresh parsley, for garnish

Instructions:
1. Preheat the oven to 350°F.
2. In a large Dutch oven, heat the olive oil over medium-high heat. Add the beef cubes and brown on all sides, about 5 minutes.
3. Add the onion, garlic, carrots, and celery to the pot. Cook for another 5 minutes, stirring occasionally.
4. Stir in the thyme, rosemary, paprika, and tomato paste. Cook for 2 minutes.
5. Pour in the beef broth and red wine. Bring to a boil, then cover and transfer to the oven.
6. Cook in the oven for 2 hours, or until the beef is tender.
7. Remove the pot from the oven and stir in the potatoes. Return to the oven and cook 

### JSON Parser (Dictionary)

There are three ways about it, Pydantic (type checking), JsonOutputParser and StructuredOutputParser.

Comparing between the first two, the Pydantice way offers the specificity of the schema for the output whereas the JsonOutputParser will only return in JSON but without any schema.

For less powerful models, StructuredOutputParser is more useful.

#### Pydantic

In [45]:
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from typing import List

In [17]:
class Recipe(BaseModel):
    recipe_name : str = Field(description="The name of the recipe")
    ingredients: List[str] = Field(description="The ingredients for the recipe")
    instructions: str = Field(description="The instructions for the recipe")

In [23]:
pydantic_parser = JsonOutputParser(pydantic_object=Recipe)

pydantic_instructions = pydantic_parser.get_format_instructions()

prompt = PromptTemplate(
    template="Tell me the recipe for making {food}.\n{format_instructions}",
    input_variables=["food"],
    partial_variables={"format_instructions": pydantic_instructions},
)

chain = {"food": RunnablePassthrough()} | prompt | llm | pydantic_parser

In [22]:
chain.invoke("chicken rice")

{'recipe_name': 'Chicken Rice',
 'ingredients': ['2 cups of white rice',
  '4 chicken thighs',
  '1 onion, chopped',
  '2 cloves of garlic, minced',
  '2 cups of chicken broth',
  '1 teaspoon of salt',
  '1/2 teaspoon of pepper',
  '1 tablespoon of olive oil'],
 'instructions': '1. In a large skillet, heat olive oil over medium heat. Add chicken thighs and cook until browned on both sides. Remove from skillet and set aside.\n2. In the same skillet, add chopped onion and garlic. Cook until softened.\n3. Add rice to the skillet and cook for 2-3 minutes, stirring frequently.\n4. Pour in chicken broth, salt, and pepper. Bring to a boil.\n5. Place the chicken thighs back into the skillet. Cover and simmer for 20 minutes or until rice is cooked and chicken is tender.\n6. Serve hot and enjoy!'}

#### JsonOutputParser

In [24]:
json_parser = JsonOutputParser()

json_instructions = json_parser.get_format_instructions()

prompt = PromptTemplate(
    template="Tell me the recipe for making {food}.\n{format_instructions}",
    input_variables=["food"],
    partial_variables={"format_instructions": json_instructions},
)

chain = {"food": RunnablePassthrough()} | prompt | llm | json_parser

In [25]:
chain.invoke("chicken rice")

{'recipe': {'title': 'Chicken Rice',
  'ingredients': ['1 whole chicken',
   '2 cups of white rice',
   '4 cups of chicken broth',
   '1 onion, chopped',
   '3 cloves of garlic, minced',
   '1 tablespoon of ginger, grated',
   '2 tablespoons of soy sauce',
   '1 teaspoon of sesame oil',
   'Salt and pepper to taste',
   'Green onions, chopped (optional)'],
  'instructions': ['In a large pot, bring the chicken broth to a boil.',
   'Add the whole chicken to the pot and simmer for about 45 minutes or until the chicken is cooked through.',
   'Remove the chicken from the pot and set aside to cool. Once cool, shred the chicken meat and discard the skin and bones.',
   'In a separate pot, heat some oil and sauté the onion, garlic, and ginger until fragrant.',
   'Add the white rice to the pot and stir to coat the rice with the aromatics.',
   'Pour in the chicken broth and bring to a boil. Reduce heat to low, cover, and simmer for about 20 minutes or until the rice is cooked.',
   'Stir in 

#### StructuredOutputParser

In [42]:
response_schemas = [
    ResponseSchema(name="recipe_name", description="The name of the recipe"),
    ResponseSchema(name="ingredients", description="The ingredients for the recipe"),
    ResponseSchema(name="instructions", description="The instructions for the recipe"),
]

structured_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [43]:
structured_instructions = structured_parser.get_format_instructions()

prompt = PromptTemplate(
    template="Tell me the recipe for making {food}.\n{format_instructions}",
    input_variables=["food"],
    partial_variables={"format_instructions": structured_instructions},
)

chain = {"food": RunnablePassthrough()} | prompt | llm | structured_parser

In [44]:
chain.invoke("almond cookies")

{'recipe_name': 'Almond Cookies',
 'ingredients': '1 cup almond flour, 1/4 cup sugar, 1/4 teaspoon salt, 1/4 teaspoon baking soda, 1/4 cup butter, 1/2 teaspoon almond extract, 1 egg',
 'instructions': '1. Preheat oven to 350°F (175°C). 2. In a bowl, mix almond flour, sugar, salt, and baking soda. 3. Add butter, almond extract, and egg to the dry ingredients. Mix until well combined. 4. Roll the dough into small balls and flatten them on a baking sheet lined with parchment paper. 5. Bake for 10-12 minutes or until the edges are golden brown. 6. Let the cookies cool before serving.'}

## Dataframe Parser

Note that this type of queries are limited to the type of questions. To perform extensive calculations, it is better to use an agent and database to perform the query.

In [27]:
import pandas as pd
import numpy as np

np.random.seed(42)

# Sample data generation
n_rows = 100  # Number of rows in the DataFrame
date_range = pd.date_range(start="2023-01-01", end="2023-12-31", periods=n_rows)
products = ['Laptop', 'Smartphone', 'Tablet', 'Headphones', 'Charger', 'Camera']
categories = ['Electronics', 'Accessories']
payment_types = ['Credit Card', 'PayPal', 'Bank Transfer']
countries = ['USA', 'Canada', 'UK', 'Germany', 'France', 'Australia']

# Creating the DataFrame
df = pd.DataFrame({
    'CustomerID': np.random.randint(1000, 2000, size=n_rows),
    'OrderID': np.random.randint(5000, 7000, size=n_rows),
    'OrderDate': np.random.choice(date_range, size=n_rows),
    'ProductName': np.random.choice(products, size=n_rows),
    'Category': np.random.choice(categories, n_rows, p=[0.8, 0.2]),
    'Quantity': np.random.randint(1, 5, size=n_rows),
    'UnitPrice': np.random.uniform(10, 1000, size=n_rows).round(2),
    'PaymentType': np.random.choice(payment_types, size=n_rows),
    'Country': np.random.choice(countries, size=n_rows)
})

df['TotalPrice'] = df['Quantity'] * df['UnitPrice']

# Displaying the first few rows of the DataFrame
df.head()

Unnamed: 0,CustomerID,OrderID,OrderDate,ProductName,Category,Quantity,UnitPrice,PaymentType,Country,TotalPrice
0,1102,6579,2023-12-16 07:01:49.090909088,Headphones,Accessories,4,990.6,Credit Card,Australia,3962.4
1,1435,5161,2023-01-12 00:43:38.181818181,Tablet,Accessories,1,418.49,PayPal,UK,418.49
2,1860,5201,2023-12-08 22:32:43.636363636,Smartphone,Accessories,3,378.3,PayPal,USA,1134.9
3,1270,6981,2023-03-22 21:20:00.000000000,Tablet,Electronics,4,778.65,PayPal,USA,3114.6
4,1106,5995,2023-02-21 11:23:38.181818181,Tablet,Electronics,1,347.4,PayPal,France,347.4


In [31]:
df_parser = PandasDataFrameOutputParser(dataframe=df)
df_instructions = df_parser.get_format_instructions()

prompt = PromptTemplate(
    template="""Answer the user's question about the dataset.

    # Format instructions
    {format_instructions}
    
    # User's question
    {query}""",
    input_variables=["query"],
    partial_variables={"format_instructions": df_instructions}
)

chain = {"query": RunnablePassthrough()} | prompt | llm | df_parser

In [40]:
chain.invoke("Retrieve the average from quantity column from all row 1 to 50.")

{'mean': 2.66}

### OpenAI Functions

In [64]:
from langchain_core.utils.function_calling import convert_pydantic_to_openai_function

class Job(BaseModel):
    job_name : str = Field(description="The name of the job")
    job_scope: str = Field(description="The job scope for this job")
    skills: List[str] = Field(description="The recommended skills for this job")

openai_function_1 = [convert_pydantic_to_openai_function(Job)]

In [65]:
prompt = PromptTemplate(
    template="""Provide job details for {job}.

    # Format instructions
    {format_instructions}""",
    input_variables=["job"],
    partial_variables={"format_instructions": df_instructions}
)

# JsonOutputFunctionsParser: Returns the arguments of the function call as JSON
json_function_parser = JsonOutputFunctionsParser()

chain = {"job": RunnablePassthrough()} | prompt | llm.bind(functions=openai_function_1) | json_function_parser

In [57]:
chain.invoke("ai engineer")

{'job_name': 'AI Engineer',
 'job_scope': 'Developing AI models for various applications',
 'skills': ['Machine Learning',
  'Deep Learning',
  'Python',
  'Data Analysis',
  'TensorFlow',
  'PyTorch']}

In [62]:
# JsonKeyOutputFunctionsParser: Returns the value of specific key in the function call as JSON
json_key_function_parser = JsonKeyOutputFunctionsParser(key_name="skills")

In [59]:
openai_function_2 = [convert_pydantic_to_openai_function(Job)]

chain = {"job": RunnablePassthrough()} | prompt | llm.bind(functions=openai_function_2) | json_key_function_parser

In [60]:
chain.invoke("ai engineer")

['Python programming',
 'Machine Learning',
 'Deep Learning',
 'Natural Language Processing',
 'Computer Vision']

In [71]:
# For validation checks, we can use PydanticOutputFunctionsParser instead
# PydanticOutputFunctionsParser: Returns the arguments of the function call as a Pydantic Model
class Job(BaseModel):
    job_name : str = Field(description="The name of the job")
    job_scope: str = Field(description="The job scope for this job")
    skills: List[str] = Field(description="The top 5 skills for this job")
    @validator('skills')
    def skills_length(cls, field):
        if len(field) > 5 or len(field) < 5:
            raise ValueError('skills should have at exactly 5 items')
        return field
    
pydantic_parser = PydanticOutputFunctionsParser(pydantic_schema=Job)

In [72]:
openai_function_3 = [convert_pydantic_to_openai_function(Job)]
chain = {"job": RunnablePassthrough()} | prompt | llm.bind(functions=openai_function_3) | pydantic_parser

In [69]:
chain.invoke("data scientist")

Job(job_name='data scientist', job_scope='analyze and interpret complex data', skills=['python', 'machine learning', 'statistics', 'data visualization', 'communication skills'])

In [73]:
# PydanticAttrOutputFunctionsParser: Returns the value of specific key in the function call as a Pydantic Model
pydantic_key_parser = PydanticAttrOutputFunctionsParser(pydantic_schema=Job, attr_name="skills")

openai_function_4 = [convert_pydantic_to_openai_function(Job)]
chain = {"job": RunnablePassthrough()} | prompt | llm.bind(functions=openai_function_4) | pydantic_key_parser

In [74]:
chain.invoke("data scientist")

['Python',
 'R',
 'Machine Learning',
 'Data Visualization',
 'Statistical Analysis']