## Output Parser

In [1]:
!pip install langchain_openai



In [1]:
from langchain_openai import ChatOpenAI
# 계속 선언 방법이 업데이트 중
# from langchain.chat_models import ChatOpenAI
# from langchain_community.chat_models import ChatOpenAI

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

In [2]:
model = ChatOpenAI(temperature=0)

### StrOutputParser

In [3]:
from langchain.schema import StrOutputParser

In [4]:
# 요약 template
human_message_prompt = "'{text}' 여기서 키워드를 뽑아서 콤마로 구분해줘"
human_message_prompt_template = HumanMessagePromptTemplate.from_template(human_message_prompt)
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt_template])

In [5]:
chain_with_output_parser = chat_prompt_template | model | StrOutputParser()

In [6]:
out = chain_with_output_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [7]:
out

'LangChain, framework, developing applications, language models'

In [8]:
type(out)

str

### CSV Parser

In [9]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate

In [10]:
output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()

In [11]:
format_instructions

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [12]:
prompt = PromptTemplate(
    template="{subject}에 대한 키워드를 추출해줘.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions},
)

In [13]:
p = prompt.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [14]:
p

StringPromptValue(text='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')

In [15]:
p.to_messages()

[HumanMessage(content='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`', additional_kwargs={}, response_metadata={})]

In [16]:
chain = prompt | model | output_parser

In [17]:
out = chain.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [18]:
out

['framework', 'developing applications', 'language models']

In [19]:
type(out)

list

In [20]:
out[0]

'framework'

### 데이터 형식 정의하기

In [21]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [22]:
model = ChatOpenAI(temperature=0)

In [23]:
# Define your desired data structure.

class Translation(BaseModel):
    translated_text: str = Field(description="번역된 텍스트")

In [24]:
# And a query intented to prompt a language model to populate the data structure.
query = "안녕 세상"

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Translation)
from_instruction = parser.get_format_instructions()
from_instruction

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "번역된 텍스트", "type": "string"}}, "required": ["translated_text"]}\n```'

In [25]:
prompt = PromptTemplate(
    template="{format_instructions}\n아래 내용을 번역해라\n{query}",
    input_variables=["query"],
    partial_variables={"format_instructions": from_instruction},
)

In [26]:
prompt.invoke({"query": query}).to_messages()

[HumanMessage(content='The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "번역된 텍스트", "type": "string"}}, "required": ["translated_text"]}\n```\n아래 내용을 번역해라\n안녕 세상', additional_kwargs={}, response_metadata={})]

In [27]:
chain = prompt | model | parser

out = chain.invoke({"query": query})

In [28]:
type(out)

dict

In [29]:
out['translated_text']

'Hello World'

### Custom output Parser

In [30]:
from langchain.schema import BaseOutputParser

class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""

    def parse(self, text):
        """Parse the output of an LLM call."""
        return text.strip().split(", ")

In [31]:
chain_with_comma_parser = chat_prompt_template | model | CommaSeparatedListOutputParser()

In [32]:
out = chain_with_comma_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [33]:
out

['LangChain', 'framework', 'developing applications', 'language models']

In [34]:
for keyword in out:
    print(keyword)

LangChain
framework
developing applications
language models
