# Output Parser

In [2]:
!pip install langchain_openai

Collecting langchain_openai
  Downloading langchain_openai-0.3.14-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-core<1.0.0,>=0.3.53 (from langchain_openai)
  Downloading langchain_core-0.3.54-py3-none-any.whl.metadata (5.9 kB)
Collecting tiktoken<1,>=0.7 (from langchain_openai)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading langchain_openai-0.3.14-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.54-py3-none-any.whl (433 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m433.9/433.9 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling coll

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

In [6]:
from google.colab import userdata
key = userdata.get('Openai')

In [8]:
model = ChatOpenAI(api_key=key, temperature=0)

## StrOutputParser

In [9]:
from langchain_core.output_parsers import StrOutputParser

In [10]:
# 요약 template

human_messages_prompt = "'{text}' 여기서 키워드를 뽑아서 콤마로 구분해줘"
human_messages_prompt_template = HumanMessagePromptTemplate.from_template(human_messages_prompt)
chat_prompt_template = ChatPromptTemplate.from_messages([human_messages_prompt_template])

In [11]:
chain_with_output_parser = chat_prompt_template | model | StrOutputParser()

In [12]:
out = chain_with_output_parser.invoke({"text":"Langchain is framework for developing applications powered by language models"})

In [13]:
out

'Langchain, framework, developing applications, language models'

In [14]:
type(out)

str

## CSV Parser

In [16]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate

In [17]:
output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()

In [18]:
format_instructions

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [19]:
prompt = PromptTemplate(
    template = "{subject}에 대한 키워드 추출해줘.\n{format_instructions}",
    input_variables = {'subject'},
    partial_variables = {'format_instructions':format_instructions},
)

In [20]:
p = prompt.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [21]:
p

StringPromptValue(text='LangChain is a framework for developing applications powered by language models.에 대한 키워드 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')

In [22]:
p.to_messages()

[HumanMessage(content='LangChain is a framework for developing applications powered by language models.에 대한 키워드 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`', additional_kwargs={}, response_metadata={})]

In [23]:
p.to_messages()

[HumanMessage(content='LangChain is a framework for developing applications powered by language models.에 대한 키워드 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`', additional_kwargs={}, response_metadata={})]

In [24]:
chain = prompt | model | output_parser

In [25]:
out = chain.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [26]:
out

['framework', 'developing applications', 'language models']

In [27]:
type(out)

list

In [28]:
out[1]

'developing applications'

## 데이터 형식 정의하기

In [29]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [31]:
model = ChatOpenAI(api_key = key, temperature=0)

In [32]:
# 원하는 구조의 데이터 정의

class Translation(BaseModel):
  translated_text: str = Field(description='번역된 텍스트')

In [33]:
# 언어 모델이 데이터 구조를 채우도록 하는 쿼리
query = "안녕 세상"

# 파서를 설정하고 프롬프트 템플릿에 지침을 채움
parser = JsonOutputParser(pydantic_object = Translation)
from_instruction = parser.get_format_instructions()
from_instruction

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "번역된 텍스트", "type": "string"}}, "required": ["translated_text"]}\n```'

In [34]:
prompt = PromptTemplate(
    template = "{format_instructions}\n아래 내용을 번역해라\n{query}",
    input_variables=['query'],
    partial_variables={'format_instructions':from_instruction}
)

In [35]:
prompt.invoke({'query': query}).to_messages()

[HumanMessage(content='The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "번역된 텍스트", "type": "string"}}, "required": ["translated_text"]}\n```\n아래 내용을 번역해라\n안녕 세상', additional_kwargs={}, response_metadata={})]

In [37]:
chain = prompt | model | parser

out = chain.invoke({'query': query})

In [38]:
out

{'translated_text': 'Hello World'}

In [39]:
type(out)

dict

In [40]:
out['translated_text']

'Hello World'

## custom output parser

In [43]:
from langchain_core.output_parsers import BaseOutputParser

class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""

    def parse(self, text):
        """Parse the output of an LLM call."""
        return text.strip().split(",")

In [44]:
chain_with_comma_parser = chat_prompt_template | model | CommaSeparatedListOutputParser()

In [45]:
out = chain_with_comma_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [46]:
out

['LangChain', ' framework', ' developing applications', ' language models']

In [47]:
for keyword in out:
    print(keyword)

LangChain
 framework
 developing applications
 language models
