# Guardrails Output Parsing

#### Load documents, build the GPTVectorStoreIndex

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [2]:
# load documents
documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()

In [3]:
index = GPTVectorStoreIndex.from_documents(documents, chunk_size=512)

INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens
> [build_index_from_documents] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 18579 tokens
> [build_index_from_documents] Total embedding token usage: 18579 tokens


#### Define Query + Guardrails Spec

In [4]:
from llama_index.output_parsers import GuardrailsOutputParser
from llama_index.llm_predictor import StructuredLLMPredictor

In [5]:
llm_predictor = StructuredLLMPredictor()

**Define custom QA and Refine Prompts**

In [7]:
from llama_index.prompts.prompts import QuestionAnswerPrompt, RefinePrompt
from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL

In [16]:
# NOTE: we don't need to define the query_str in the rail spec, we can define during query-time.
rail_spec = ("""
<rail version="0.1">

<output>
    <list name="points" description="Bullet points regarding events in the author's life.">
        <object>
            <string name="explanation" format="one-line" on-fail-one-line="noop" />
            <string name="explanation2" format="one-line" on-fail-one-line="noop" />
            <string name="explanation3" format="one-line" on-fail-one-line="noop" />
        </object>
    </list>
</output>

<prompt>

Query string here.

@xml_prefix_prompt

{output_schema}

@json_suffix_prompt_v2_wo_none
</prompt>
</rail>
""")

In [17]:
output_parser = GuardrailsOutputParser.from_rail_string(rail_spec, llm=llm_predictor.llm)

In [18]:
# NOTE: we use the same output parser for both prompts, though you can choose to use different parsers
# NOTE: here we add formatting instructions to the prompts.

fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)

qa_prompt = QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser)
refine_prompt = RefinePrompt(fmt_refine_tmpl, output_parser=output_parser)

In [19]:
# take a look at the new QA template! 
print(fmt_qa_tmpl)

Context information is below. 
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the question: {query_str}



Given below is XML that describes the information to extract from this document and the tags to extract it into.


<output>
    <list name="points" description="Bullet points regarding events in the author's life.">
        <object>
            <string name="explanation" format="one-line"/>
            <string name="explanation2" format="one-line"/>
            <string name="explanation3" format="one-line"/>
        </object>
    </list>
</output>




ONLY return a valid JSON object (no other text is necessary). The JSON MUST conform to the XML format, including any types and format requests e.g. requests for lists, objects and specific types. Be correct and concise.

JSON Output:




#### Query Index

In [11]:
query_engine = index.as_query_engine(
    text_qa_template=qa_prompt, 
    refine_template=refine_prompt, 
    llm_predictor=llm_predictor
)
response = query_engine.query(
    "What are the three items the author did growing up?", 
)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 754 tokens
> [query] Total LLM token usage: 754 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 11 tokens
> [query] Total embedding token usage: 11 tokens


In [13]:
print(response)

{'points': [{'explanation': 'Writing short stories', 'explanation2': 'Programming on an IBM 1401', 'explanation3': 'Using microcomputers'}]}
