<a href="https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Managing_Outputs_with_Output_Parsers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

# PydanticOutputParser

## Imports

In [4]:
from langchain.chat_models import ChatOpenAI

from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

In [5]:
from pydantic import BaseModel, Field, validator
from typing import List

In [6]:
model_name = 'gpt-4o-mini'
temperature = 0.0
model = ChatOpenAI(model_name=model_name, temperature=temperature)

## Documentation Example

In [7]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field

In [8]:
# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

In [9]:
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

In [10]:
prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [11]:
prompt_template

PromptTemplate(input_variables=['query'], output_parser=None, partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"title": "Setup", "description": "question to set up a joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}\n```'}, template='Answer the user query.\n{format_instructions}\n{query}\n', template_format='f-string', validate_template=True)

In [12]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [13]:
# Run the LLMChain to get the AI-generated answer
output = chain.run(joke_query)

In [14]:
parser.parse(output)

Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything!')

In [15]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")

In [16]:
actor_query = "Generate the filmography for a random actor."

In [17]:
parser = PydanticOutputParser(pydantic_object=Actor)

In [18]:
prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [19]:
prompt_template

PromptTemplate(input_variables=['query'], output_parser=None, partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"name": {"title": "Name", "description": "name of an actor", "type": "string"}, "film_names": {"title": "Film Names", "description": "list of names of films they starred in", "type": "array", "items": {"type": "string"}}}, "required": ["name", "film_names"]}\n```'}, template='Answer the user query.\n{format_instructions}\n{query}\n', template_format='f-string', validate_template=True)

In [20]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [22]:
# Run the LLMChain to get the AI-generated answer
output = chain.run(actor_query)

In [23]:
parser.parse(output)

Actor(name='Leonardo DiCaprio', film_names=['Titanic', 'Inception', 'The Revenant', 'The Wolf of Wall Street', 'Shutter Island', 'Catch Me If You Can', 'Once Upon a Time in Hollywood', 'The Aviator', 'Gangs of New York', 'Django Unchained'])

## My Example

In [30]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")

    # Throw error in case of recieving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
        if field[0].isnumeric():
            raise ValueError("The word can not start with numbers!")
        return field

In [31]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [32]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context.
{format_instructions}
target_word={target_word}
context={context}
"""

In [33]:
target_word="behaviour"
context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson."

In [34]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [35]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [36]:
# Run the LLMChain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context":context})

In [37]:
parser.parse(output)

Suggestions(words=['conduct', 'action', 'demeanor', 'attitude', 'performance', 'manner', 'response', 'conduct', 'deportment'])

## My Example, Multiple Outputs

In [38]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

    # Throw error in case of recieving a numbered-list from API
    @validator('words')
    def not_start_with_number(cls, field):
      for item in field:
        if item[0].isnumeric():
          raise ValueError("The word can not start with numbers!")
      return field

    @validator('reasons')
    def end_with_dot(cls, field):
      for idx, item in enumerate( field ):
        if item[-1] != ".":
          field[idx] += "."
      return field

In [39]:
parser = PydanticOutputParser(pydantic_object=Suggestions)

In [40]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""



In [41]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [42]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [43]:
# Run the LLMChain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context":context})

In [45]:
parsed_output = parser.parse(output)

In [48]:
for idx, suggestion in enumerate(parsed_output):
    print(idx)
    print(suggestion)

0
('words', ['conduct', 'actions', 'demeanor', 'attitude', 'mannerisms'])
1
('reasons', ["The word 'conduct' refers to the way in which a person behaves, especially in a specific context, making it suitable for describing students' actions in the classroom.", "The term 'actions' captures the specific things that students do, which aligns with the context of their disruptive behaviour.", "The word 'demeanor' relates to the outward behavior or bearing of the students, which is relevant in assessing how they are perceived in the classroom.", "The term 'attitude' reflects the students' mental state or disposition, which can influence their behaviour in a learning environment.", "The word 'mannerisms' refers to the habitual gestures or behaviors of the students, which can contribute to the overall disruptive nature mentioned in the context."])


# CommaSeparatedListOutputParser

## Imports

In [None]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser

## My example

In [None]:
parser = CommaSeparatedListOutputParser()

In [None]:
template = """
Offer a list of suggestions to substitue the word '{target_word}' based the presented the following text: {context}.
{format_instructions}
"""

In [None]:
prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
chain = LLMChain(llm=model, prompt=prompt_template)

In [None]:
# Run the LLMChain to get the AI-generated answer
output = chain.run({"target_word": target_word, "context":context})

In [None]:
parser.parse(output)

['conduct',
 'actions',
 'demeanor',
 'conduct',
 'performance',
 'attitude',
 'manners',
 'actions',
 'deportment',
 'mannerisms']

# OutputFixingParser

In [None]:
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser

In [None]:
from pydantic import BaseModel, Field
from typing import List

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

## Example can fix

In [None]:
missformatted_output = '{"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}'

In [None]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"], "reasoning": ["refers to the way someone acts in a particular situation.", "refers to the way someone behaves in a particular situation."]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [None]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)

In [None]:
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['refers to the way someone acts in a particular situation.', 'refers to the way someone behaves in a particular situation.'])

## Example can NOT fix

In [None]:
missformatted_output = '{"words": ["conduct", "manner"]}'

In [None]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [None]:
outputfixing_parser = OutputFixingParser.from_llm(parser=parser, llm=model)

In [None]:
outputfixing_parser.parse(missformatted_output)

Suggestions(words=['conduct', 'manner'], reasons=['These words describe the behavior or way of carrying oneself in a given situation.'])

# RetryOutputParser

In [None]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import RetryWithErrorOutputParser

In [None]:
from pydantic import BaseModel, Field, validator
from typing import List

In [None]:
# Define your desired data structure.
class Suggestions(BaseModel):
    words: List[str] = Field(description="list of substitue words based on context")
    reasons: List[str] = Field(description="the reasoning of why this word fits the context")

parser = PydanticOutputParser(pydantic_object=Suggestions)

In [None]:
template = """
Offer a list of suggestions to substitue the specified target_word based the presented context and the reasoning for each word.
{format_instructions}
target_word={target_word}
context={context}
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["target_word", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

model_input = prompt_template.format_prompt(target_word="behaviour", context="The behaviour of the students in the classroom was disruptive and made it difficult for the teacher to conduct the lesson.")

In [None]:
missformatted_output = '{"words": ["conduct", "manner"]}'

In [None]:
parser.parse(missformatted_output)

OutputParserException: Failed to parse Suggestions from completion {"words": ["conduct", "manner"]}. Got: 1 validation error for Suggestions
reasons
  field required (type=value_error.missing)

In [None]:
retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=model)

In [None]:
retry_parser.parse_with_prompt(missformatted_output, model_input)

Suggestions(words=['conduct', 'manner'], reasons=['These words both refer to the way in which something is carried out, which is relevant to the context of the disruptive behavior in the classroom.'])