# Langchain Output Parsing

#### Load documents, build the GPTVectorStoreIndex

In [17]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [2]:
# load documents
documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()

In [3]:
index = GPTVectorStoreIndex.from_documents(documents, chunk_size=512)

INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens
> [build_index_from_documents] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 18579 tokens
> [build_index_from_documents] Total embedding token usage: 18579 tokens


#### Define Query + Langchain Output Parser

In [2]:
from llama_index.output_parsers import LangchainOutputParser
from llama_index.llm_predictor import StructuredLLMPredictor
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [3]:
llm_predictor = StructuredLLMPredictor()

**Define custom QA and Refine Prompts**

In [4]:
from llama_index.prompts.prompts import QuestionAnswerPrompt, RefinePrompt
from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL

In [5]:
response_schemas = [
    ResponseSchema(name="Education", description="Describes the author's educational experience/background."),
    ResponseSchema(name="Work", description="Describes the author's work experience/background.")
]

In [6]:
lc_output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
output_parser = LangchainOutputParser(lc_output_parser)

In [15]:
# NOTE: we use the same output parser for both prompts, though you can choose to use different parsers
# NOTE: here we add formatting instructions to the prompts.

fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)

qa_prompt = QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser)
refine_prompt = RefinePrompt(fmt_refine_tmpl, output_parser=output_parser)

In [10]:
# take a look at the new QA template! 
print(fmt_qa_tmpl)

Context information is below. 
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the question: {query_str}


The output should be a markdown code snippet formatted in the following schema:

```json
{{
	"Education": string  // Describes the author's educational experience/background.
	"Work": string  // Describes the author's work experience/background.
}}
```


#### Query Index

In [19]:
query_engine = index.as_query_engine(
    text_qa_template=qa_prompt, 
    refine_template=refine_prompt, 
    llm_predictor=llm_predictor
)
response = query_engine.query(
    "What are a few things the author did growing up?", 
)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 609 tokens


> [query] Total LLM token usage: 609 tokens


INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 11 tokens


> [query] Total embedding token usage: 11 tokens


In [20]:
print(response)

{'Education': 'Before college, the author wrote short stories and experimented with programming on an IBM 1401.', 'Work': 'The author worked on writing and programming outside of school.'}
