Output Parsing

1. StrOutputParser
2. JsonOutputParser
3. CSV Output Parser
4. Datetime Output Parser
5. Structured Output Parser (Pydanitc or Json)

In [1]:
# langsmith(online), langfuse(offline), opik(offline) : LLM observability tool
# pip install python-dotenv
# 랭스미스를 env파일을 통해 셋업
from dotenv import load_dotenv

load_dotenv('./../.env')

True

In [2]:
# Ollama를 통해 모델 로드
from langchain_ollama import ChatOllama
from langchain_core.prompts import (
    SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
)

base_url = "http://localhost:11434"
model = 'llama3.2:1b'

llm = ChatOllama(base_url=base_url, model=model)

response = llm.invoke('hello there!')
print(response.content)

Hello! How can I help you today?


In [3]:
# Pydantic
from typing import Optional
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser

In [4]:
class Joke(BaseModel):
    """Joke to tell user"""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline of the joke")
    rating : Optional[int] = Field(description="The rating of the joke is from 1 to 10")

In [5]:
parser = PydanticOutputParser(pydantic_object=Joke)

instruction = parser.get_format_instructions()
print(instruction)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"description": "Joke to tell user", "properties": {"setup": {"description": "The setup of the joke", "title": "Setup", "type": "string"}, "punchline": {"description": "The punchline of the joke", "title": "Punchline", "type": "string"}, "rating": {"anyOf": [{"type": "integer"}, {"type": "null"}], "description": "The rating of the joke is from 1 to 10", "title": "Rating"}}, "required": ["setup", "punchline", "rating"]}
```


In [11]:
prompt = PromptTemplate(
    template='''
        Answer the user query with a joke. Here is your formatting instruction.
        {format_instruction}

        Query: {query}
        Answer:
    ''',
    input_variables=['query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
)


In [15]:
chain = prompt | llm
output = chain.invoke({'query':'Tell me a joke about the cat'})
print(output.content)

{
  "$schema": "https://json-schema.org/draft-07/schema#",
  "description": "Joke to tell user",
  "properties": {
    "setup": {"type": "string", "title": "Cat joke setup"},
    "punchline": {"type": "string", "title": "Cat joke punchline"},
    "rating": {"anyOf": [
      { "type": "integer", "description": "Rating from 1 to 10" },
      { "type": "null", "description": "No rating specified" }
    ], "title": "Cat joke rating"}
  },
  "required": ["setup", "punchline", "rating"]
}


In [None]:
# llama:3b 이상부터 지원
chain = prompt | llm | parser
output = chain.invoke({'query':'Tell me a joke about the cat'})
print(output.content)

OutputParserException: Failed to parse Joke from completion [{"$schema": "https://openapi-schemas.com/2023-01-01/pet-joke-openapi.json", "description": "Joke to tell user", "properties": {"setup": {"type": "string", "title": "The setup of the joke"}, "punchline": {"type": "string", "title": "The punchline of the joke"}, "rating": {"anyOf": [{"type": "integer"}, {"type": "null"}], "description": "The rating of the joke is from 1 to 10", "title": "Rating"}}, "required": ["setup", "punchline", "rating"]}]. Got: 1 validation error for Joke
  Input should be a valid dictionary or instance of Joke [type=model_type, input_value=[{'$schema': 'https://ope...'punchline', 'rating']}], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [18]:
# .with_structured_output()
structured_llm = llm.with_structured_output(Joke)

output = structured_llm.invoke('Tell me a joke about the cat')
print(output)

setup='Why did the cat join a band?' punchline='Because it wanted to be a purr-cussionist!' rating=8


In [19]:
# JSON Output Parser
from langchain_core.output_parsers import JsonOutputParser

parser = JsonOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template='''
        Answer the user query with a joke. Here is your formatting instruction.
        {format_instruction}

        Query: {query}
        Answer:
    ''',
    input_variables=['query'],
    partial_variables={'format_instruction': parser.get_format_instructions()}
)

chain = prompt | llm
output = chain.invoke({'query':'Tell me a joke about the cat'})
print(output.content)

{
  "properties": {
    "setup": {"description": "The setup of the joke", "title": "Cat", "type": "string"},
    "punchline": {"description": "The punchline of the joke", "title": "Punchline", "type": "string"}
  },
  "required": ["setup", "punchline"]
}


In [21]:
chain = prompt | llm | parser
output = chain.invoke({'query':'Tell me a joke about the cat'})
print(output)

{'$schema': 'http://json-schema.org/draft-07/schema#', 'description': 'Joke to tell user', 'properties': {'setup': {'description': 'The setup of the joke', 'title': 'Setup', 'type': 'string'}, 'punchline': {'description': 'The punchline of the joke', 'title': 'Punchline', 'type': 'string'}, 'rating': {'anyOf': [{'type': 'integer'}, {'type': 'null'}], 'description': 'The rating of the joke is from 1 to 10', 'title': 'Rating'}}, 'required': ['setup', 'punchline', 'rating']}


In [24]:
# CSV Parser
from langchain_core.output_parsers import CommaSeparatedListOutputParser

parser = CommaSeparatedListOutputParser()

format_instruction = parser.get_format_instructions()

prompt = PromptTemplate(
    template='''
        Answer the user query with a joke. Here is your formatting instruction.
        {format_instruction}

        Query: {query}
        Answer:
    ''',
    input_variables=['query'],
    partial_variables={'format_instruction': format_instruction}
)

chain = prompt | llm | parser
output = chain.invoke({'query': 'Generate my website seo keywords. I have content about the NLP and LLM.'})
print(output)

['neural network', 'language model', 'natural language processing', 'long short-term memory', 'keyword research', 'search engine optimization', 'lla', 'nlp research', 'machine learning', 'text analysis', 'artificial intelligence', 'computer science', 'web development', 'website optimization', 'seo best practices', 'keyword suggestions', 'content creation.']


In [27]:
# Datetime Output parser (응답에서 시간값만 얻고 싶다면)

from langchain.output_parsers import DatetimeOutputParser

parser = DatetimeOutputParser()

format_instruction = parser.get_format_instructions()

prompt = PromptTemplate(
    template='''
        Answer the user query with a datetime. Here is your formatting instruction.
        {format_instruction}

        Query: {query}
        Answer:
    ''',
    input_variables=['query'],
    partial_variables={'format_instruction': format_instruction}
)

chain = prompt | llm | parser
output = chain.invoke({'query': 'When the america got discovered?'})
print(output)

0401-07-17 16:15:20
