In [None]:
from typing import List
from datetime import datetime
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import (
    StrOutputParser,
    JsonOutputParser,
    PydanticOutputParser
)
from langchain_core.exceptions import OutputParserException
from langchain.output_parsers import OutputFixingParser
from dotenv import load_dotenv
from rich import pretty
import os

In [11]:
load_dotenv()
pretty.install()

In [12]:
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
    api_key=os.getenv("OPEN_AI_API_KEY"),
)

## Output Parsers

**String Parser**

In [13]:
llm.invoke("hello")


[1;35mAIMessage[0m[1m([0m
    [33mcontent[0m=[32m'Hello! How can I assist you today?'[0m,
    [33madditional_kwargs[0m=[1m{[0m[32m'refusal'[0m: [3;35mNone[0m[1m}[0m,
    [33mresponse_metadata[0m=[1m{[0m
        [32m'token_usage'[0m: [1m{[0m
            [32m'completion_tokens'[0m: [1;36m9[0m,
            [32m'prompt_tokens'[0m: [1;36m8[0m,
            [32m'total_tokens'[0m: [1;36m17[0m,
            [32m'completion_tokens_details'[0m: [1m{[0m
                [32m'accepted_prediction_tokens'[0m: [1;36m0[0m,
                [32m'audio_tokens'[0m: [1;36m0[0m,
                [32m'reasoning_tokens'[0m: [1;36m0[0m,
                [32m'rejected_prediction_tokens'[0m: [1;36m0[0m
            [1m}[0m,
            [32m'prompt_tokens_details'[0m: [1m{[0m[32m'audio_tokens'[0m: [1;36m0[0m, [32m'cached_tokens'[0m: [1;36m0[0m[1m}[0m
        [1m}[0m,
        [32m'model_name'[0m: [32m'gpt-4o-mini-2024-07-18'[0m,
        [3

In [14]:
llm.invoke("hello").content

[32m'Hello! How can I assist you today?'[0m

In [15]:
parser = StrOutputParser()

In [16]:
parser.invoke(
    llm.invoke("hello")
)

[32m'Hello! How can I assist you today?'[0m

### Other Parsers

**Datetime**

In [17]:
llm.invoke(
    "Output a random datetime in %Y-%m-%dT%H:%M:%S.%fZ. "
    "Don't say anything else"
)


[1;35mAIMessage[0m[1m([0m
    [33mcontent[0m=[32m'2023-10-05T14:23:45.123456Z'[0m,
    [33madditional_kwargs[0m=[1m{[0m[32m'refusal'[0m: [3;35mNone[0m[1m}[0m,
    [33mresponse_metadata[0m=[1m{[0m
        [32m'token_usage'[0m: [1m{[0m
            [32m'completion_tokens'[0m: [1;36m16[0m,
            [32m'prompt_tokens'[0m: [1;36m33[0m,
            [32m'total_tokens'[0m: [1;36m49[0m,
            [32m'completion_tokens_details'[0m: [1m{[0m
                [32m'accepted_prediction_tokens'[0m: [1;36m0[0m,
                [32m'audio_tokens'[0m: [1;36m0[0m,
                [32m'reasoning_tokens'[0m: [1;36m0[0m,
                [32m'rejected_prediction_tokens'[0m: [1;36m0[0m
            [1m}[0m,
            [32m'prompt_tokens_details'[0m: [1m{[0m[32m'audio_tokens'[0m: [1;36m0[0m, [32m'cached_tokens'[0m: [1;36m0[0m[1m}[0m
        [1m}[0m,
        [32m'model_name'[0m: [32m'gpt-4o-mini-2024-07-18'[0m,
        [32m'sy

In [18]:
parser = DatetimeOutputParser()

NameError: name 'DatetimeOutputParser' is not defined

In [10]:
parser.invoke(
    llm.invoke(
        "Output a random datetime in %Y-%m-%dT%H:%M:%S.%fZ. "
        "Don't say anything else"
    )
)

datetime.datetime(2023, 10, 5, 14, 23, 45, 123456)

**Boolean**

In [11]:
llm.invoke(
    "Are you an AI? YES or NO only"
)

AIMessage(content='YES', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 16, 'total_tokens': 18, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_06737a9306', 'finish_reason': 'stop', 'logprobs': None}, id='run-56e74bdc-c848-46e8-a9a4-caa9f8b64b72-0', usage_metadata={'input_tokens': 16, 'output_tokens': 2, 'total_tokens': 18, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [12]:
parser = BooleanOutputParser()

In [13]:
parser.invoke(
    input=llm.invoke(
        "Are you an AI? YES or NO only"
    )
)

True

In [14]:
parser.invoke(
    input=llm.invoke(
        "Are you Human? YES or NO only"
    )
)

False

## Structured

**Dict Schema**

In [19]:
from typing_extensions import Annotated, TypedDict

class UserInfo(TypedDict):
    """User's info."""
    name: Annotated[str, "", "User's name. Defaults to ''"]
    country: Annotated[str, "", "Where the user lives. Defaults to ''"]


In [20]:
llm_with_structure = llm.with_structured_output(UserInfo)

In [21]:
llm_with_structure.invoke(
    "My name is Henrique, and I am from Brazil"
)

[1m{[0m[32m'name'[0m: [32m'Henrique'[0m, [32m'country'[0m: [32m'Brazil'[0m[1m}[0m

In [22]:
llm_with_structure.invoke(
    "The sky is blue"
)

[1m{[0m[1m}[0m

In [23]:
llm_with_structure.invoke(
    "Hello, my name is the same as the capital of the U.S.  "
    "But I'm from a country where we usually associate with kangaroos"
)

[1m{[0m[32m'name'[0m: [32m'Washington'[0m, [32m'country'[0m: [32m'Australia'[0m[1m}[0m

**Pydantic**

In [24]:
from pydantic import BaseModel, Field

class PydanticUserInfo(BaseModel):
    """User's info."""
    name: Annotated[str, Field(description="User's name. Defaults to ''", default=None)]
    country: Annotated[str, Field(description="Where the user lives. Defaults to ''", default=None, )]

In [25]:
llm_with_structure = llm.with_structured_output(PydanticUserInfo)

In [26]:
structured_output = llm_with_structure.invoke("The sky is blue")

In [27]:
structured_output

[1;35mPydanticUserInfo[0m[1m([0m[33mname[0m=[3;35mNone[0m, [33mcountry[0m=[3;35mNone[0m[1m)[0m

In [28]:
print(structured_output.name)

None


In [29]:
print(structured_output.country)

None


In [30]:
structured_output = llm_with_structure.invoke(
    "Hello, my name is the same as the capital of the U.S.  "
    "But I'm from a country where we usually associate with kangaroos"
)

In [31]:
structured_output

[1;35mPydanticUserInfo[0m[1m([0m[33mname[0m=[32m'Washington'[0m, [33mcountry[0m=[32m'Australia'[0m[1m)[0m

## Dealing with Errors

In [32]:
class Performer(BaseModel):
    """Filmography info about an actor/actress"""
    name: Annotated[str, Field(description="name of an actor/actress")]
    film_names: Annotated[List[str], Field(description="list of names of films they starred in")]

In [33]:
llm_with_structure = llm.with_structured_output(Performer)

In [34]:
response = llm_with_structure.invoke(
    "Generate the filmography for Scarlett Johansson. Top 5 only"
)
response


[1;35mPerformer[0m[1m([0m
    [33mname[0m=[32m'Scarlett Johansson'[0m,
    [33mfilm_names[0m=[1m[[0m[32m'Lost in Translation'[0m, [32m'The Avengers'[0m, [32m'Her'[0m, [32m'Marriage Story'[0m, [32m'Black Widow'[0m[1m][0m
[1m)[0m

**Fixing Parser**

In [35]:
response.json()

/tmp/ipykernel_2412/690762135.py:1: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  response.json()


[32m'[0m[32m{[0m[32m"name":"Scarlett Johansson","film_names":[0m[32m[[0m[32m"Lost in Translation","The Avengers","Her","Marriage Story","Black Widow"[0m[32m][0m[32m}[0m[32m'[0m

In [36]:
parser = PydanticOutputParser(pydantic_object=Performer)

In [37]:
parser.parse(response.json())

/tmp/ipykernel_2412/2130552313.py:1: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  parser.parse(response.json())



[1;35mPerformer[0m[1m([0m
    [33mname[0m=[32m'Scarlett Johansson'[0m,
    [33mfilm_names[0m=[1m[[0m[32m'Lost in Translation'[0m, [32m'The Avengers'[0m, [32m'Her'[0m, [32m'Marriage Story'[0m, [32m'Black Widow'[0m[1m][0m
[1m)[0m

In [38]:
misformatted_result = "{'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}"

In [39]:
try:
    parser.parse(misformatted_result)
except OutputParserException as e:
    print(e)

Invalid json output: {'name': 'Scarlett Johansson', 'film_names': ['The Avengers']}
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 


In [40]:
new_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

In [41]:
new_parser.parse(misformatted_result)


[1;35mPerformer[0m[1m([0m
    [33mname[0m=[32m'Scarlett Johansson'[0m,
    [33mfilm_names[0m=[1m[[0m[32m'The Avengers'[0m, [32m'Lost in Translation'[0m, [32m'Marriage Story'[0m, [32m'Black Widow'[0m[1m][0m
[1m)[0m