# Output Parsing

+ Langchain internal (modifies the prompt with formatting instructions)

In [1]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
_

True

In [3]:
# an example prompt

prompt_str = "Answer the following question. There are 5 possible answer and only 1 is correct. \
    Question: James wanted to find an old underground map from the 50s.  Where might he look for one? \
    - A: library \
    - B: subway station \
    - C: county engineer's office \
    - D: super market \
    - E: home \
    Choose the correct answer. Let's think step by step."

In [13]:
# normal call to the model

import openai

openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", 
         "content": prompt_str
        }
    ]
)

<OpenAIObject chat.completion id=chatcmpl-7ddrVPFZkZEwUIm6Dpp6CkxZ2GEqA at 0x12394b4c0> JSON: {
  "id": "chatcmpl-7ddrVPFZkZEwUIm6Dpp6CkxZ2GEqA",
  "object": "chat.completion",
  "created": 1689682377,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "First, it is unlikely that James would find an old underground map from the 50s at a library or supermarket, as these places typically do not store historical documents like maps. \n\nNext, the county engineer's office may have records or maps related to infrastructure and planning, but it is less likely to have an underground map from the 50s specifically.\n\nTherefore, the best option would be to look for an old underground map from the 50s at a subway station. Subway stations often have historical displays or archives that may include maps from past decades. \n\nSo, the correct answer would be B: subway station."
      },
      "finish_reason": "s

In [2]:
# instantiate the model via langchain integration

from langchain.chat_models import ChatOpenAI

chat_llm = ChatOpenAI(temperature=0.0)

In [4]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

prediction_letter_schema = ResponseSchema(
    name="prediction_letter", 
    description="This is the answer to the question. It is one letter (either A, B, C, D, or E), which corresponds to the answer chosen by the model."
)

prediction_text_schema = ResponseSchema(
    name="prediction_text", 
    description="This is the answer to the question. It is the text of the answer chosen by the model."
)

explanation_schema = ResponseSchema(
    name="explanation", 
    description="This is the explanation for the prediction. It is a piece of text explains why the model chose the answer it did."
)

schemas = [prediction_letter_schema, prediction_text_schema, explanation_schema]

In [5]:
output_parser = StructuredOutputParser.from_response_schemas(schemas)

In [6]:
format_instructions = output_parser.get_format_instructions()

import pprint as pp

pp.pprint(format_instructions)

('The output should be a markdown code snippet formatted in the following '
 'schema, including the leading and trailing "```json" and "```":\n'
 '\n'
 '```json\n'
 '{\n'
 '\t"prediction_letter": string  // This is the answer to the question. It is '
 'one letter (either A, B, C, D, or E), which corresponds to the answer chosen '
 'by the model.\n'
 '\t"prediction_text": string  // This is the answer to the question. It is '
 'the text of the answer chosen by the model.\n'
 '\t"explanation": string  // This is the explanation for the prediction. It '
 'is a piece of text explains why the model chose the answer it did.\n'
 '}\n'
 '```')


In [10]:
test_question_and_choices = "James wanted to find an old underground map from the 50s.  Where might he look for one? \
    - A: library \
    - B: subway station \
    - C: county engineer's office \
    - D: super market \
    - E: home"

In [11]:
from langchain.prompts import ChatPromptTemplate

template = """Answer the following question. There are 5 possible answer and only 1 is correct.
    Question: {question_and_choices}
    Choose the correct answer. Let's think step by step.
    {format_instructions}"""

template_lc = ChatPromptTemplate.from_template(template)

prompt_lc = template_lc.format_messages(
    question_and_choices=test_question_and_choices,
    format_instructions=format_instructions
)

In [12]:
print(prompt_lc[0].content)

Answer the following question. There are 5 possible answer and only 1 is correct.
    Question: James wanted to find an old underground map from the 50s.  Where might he look for one?     - A: library     - B: subway station     - C: county engineer's office     - D: super market     - E: home
    Choose the correct answer. Let's think step by step.
    The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"prediction_letter": string  // This is the answer to the question. It is one letter (either A, B, C, D, or E), which corresponds to the answer chosen by the model.
	"prediction_text": string  // This is the answer to the question. It is the text of the answer chosen by the model.
	"explanation": string  // This is the explanation for the prediction. It is a piece of text explains why the model chose the answer it did.
}
```


In [13]:
response = chat_llm(prompt_lc)

pp.pprint(response)

AIMessage(content='```json\n{\n\t"prediction_letter": "A",\n\t"prediction_text": "library",\n\t"explanation": "James is looking for an old underground map from the 50s. The most likely place to find such a map would be in a library, where historical documents and maps are often stored."\n}\n```', additional_kwargs={}, example=False)


In [15]:
# parse the output -> structured output

response_as_dict = output_parser.parse(response.content)

pp.pprint(response_as_dict)

{'explanation': 'James is looking for an old underground map from the 50s. The '
                'most likely place to find such a map would be in a library, '
                'where historical documents and maps are often stored.',
 'prediction_letter': 'A',
 'prediction_text': 'library'}
