In [1]:
from dotenv import load_dotenv
load_dotenv()
import os

In [9]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# Global Models


In [6]:
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [None]:
# Using the LlamaDebugHandler to print the trace of the sub questions
# captured by the SUB_QUESTION callback event type
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

Settings.callback_manager = callback_manager

# Pull data

In [10]:
os.makedirs('data/paul_graham/', exist_ok=True)
!curl -o "data/paul_graham/paul_graham_essay.txt" "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 75042  100 75042    0     0   186k      0 --:--:-- --:--:-- --:--:--  188k


# Load Data
We first show how to convert a Document into a set of Nodes, and insert into a DocumentStore.

In [12]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader("data/paul_graham").load_data()

DEBUG:llama_index.core.readers.file.base:> [SimpleDirectoryReader] Total files added: 1
> [SimpleDirectoryReader] Total files added: 1
DEBUG:fsspec.local:open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/data/paul_graham/paul_graham_essay.txt
open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/data/paul_graham/paul_graham_essay.txt


In [13]:
documents

[Document(id_='cb002e00-8750-44a4-8f47-e863163689df', embedding=None, metadata={'file_path': 'c:\\Code\\Github\\LlamaIndex\\06.Advanced_Topics\\1.Building Performant RAG Applications for Production\\3.Dynamically Retrieve Chunks Depending on your Task\\Router Query Engine\\data\\paul_graham\\paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2024-11-01', 'last_modified_date': '2024-11-01'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='\n\nWhat I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: 

# Chunk the data

In [14]:
from llama_index.core import Settings

# initialize settings (set chunk size)
Settings.chunk_size = 1024
nodes = Settings.node_parser.get_nodes_from_documents(documents)

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: What I Worked On

February 2021

Before college...
> Adding chunk: What I Worked On

February 2021

Before college...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: All that seemed left for philosophy were edge c...
> Adding chunk: All that seemed left for philosophy were edge c...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: It was not, in fact, simply a matter of teachin...
> Adding chunk: It was not, in fact, simply a matter of teachin...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: That fall I started taking art classes at Harva...
> Adding chunk: That fall I started taking art classes at Harva...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: I remember that I answered the essay question b...
> Adding chunk: I remember that I answered the essay question b...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: This is not the only way to paint. I'm

In [15]:
nodes

[TextNode(id_='d7a4887d-995b-4f31-9bda-7c0f6146cb75', embedding=None, metadata={'file_path': 'c:\\Code\\Github\\LlamaIndex\\06.Advanced_Topics\\1.Building Performant RAG Applications for Production\\3.Dynamically Retrieve Chunks Depending on your Task\\Router Query Engine\\data\\paul_graham\\paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2024-11-01', 'last_modified_date': '2024-11-01'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='cb002e00-8750-44a4-8f47-e863163689df', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': 'c:\\Code\\Github\\LlamaIndex\\06.Advanced_Topics\\1.Building Performant RAG Applications for Prod

# Build storage context

In [17]:
from llama_index.core import StorageContext

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

In [19]:
storage_context.persist(persist_dir="./paul_graham_index")

DEBUG:fsspec.local:open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/paul_graham_index/docstore.json
open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/paul_graham_index/docstore.json
DEBUG:fsspec.local:open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/paul_graham_index/index_store.json
open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/paul_graham_index/index_store.json
DEBUG:fsspec.local:open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant

# Define Summary Index and Vector Index over Same Data


In [18]:
from llama_index.core import SummaryIndex
from llama_index.core import VectorStoreIndex

summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

**********
Trace: index_construction
**********
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Code\\Github\\LlamaIndex\\venv\\Library\\ssl\\cacert.pem'
load_verify_locations cafile='C:\\Code\\Github\\LlamaIndex\\venv\\Library\\ssl\\cacert.pem'


DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x0000014DA9AEE170>, 'json_data': {'input': ['file_path: c:\\Code\\Github\\LlamaIndex\\06.Advanced_Topics\\1.Building Performant RAG Applications for Production\\3.Dynamically Retrieve Chunks Depending on your Task\\Router Query Engine\\data\\paul_graham\\paul_graham_essay.txt  What I Worked On  February 2021  Before college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.  The first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school dis

# Load the Index (from storage)

 When you need to use the index again, instead of re-indexing, you can load it from the persisted storage using load_index_from_storage.

In [None]:
from llama_index.core import StorageContext, load_index_from_storage

# Rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="paul_graham_index")

summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)


DEBUG:llama_index.core.storage.kvstore.simple_kvstore:Loading llama_index.core.storage.kvstore.simple_kvstore from paul_graham_index\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from paul_graham_index\docstore.json.
DEBUG:fsspec.local:open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/paul_graham_index/docstore.json
open file: c:/Code/Github/LlamaIndex/06.Advanced_Topics/1.Building Performant RAG Applications for Production/3.Dynamically Retrieve Chunks Depending on your Task/Router Query Engine/paul_graham_index/docstore.json
DEBUG:llama_index.core.storage.kvstore.simple_kvstore:Loading llama_index.core.storage.kvstore.simple_kvstore from paul_graham_index\index_store.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from paul_graham_index\index_store.json.
DEBUG:fsspec.local:open file: c:/Code/Github/LlamaIndex/06.Ad

# Define Query Engines and Set Metadata

In [23]:
list_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [24]:
from llama_index.core.tools import QueryEngineTool


list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description=(
        "Useful for summarization questions related to Paul Graham eassy on"
        " What I Worked On."
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from Paul Graham essay on What"
        " I Worked On."
    ),
)

# Define Router Query Engine
There are several selectors available, each with some distinct attributes.

The LLM selectors use the LLM to output a JSON that is parsed, and the corresponding indexes are queried.

The Pydantic selectors (currently only supported by gpt-4-0613 and gpt-3.5-turbo-0613 (the default)) use the OpenAI Function Call API to produce pydantic selection objects, rather than parsing raw JSON.

For each type of selector, there is also the option to select 1 index to route to, or multiple.


# PydanticSingleSelector
Use the OpenAI Function API to generate/parse pydantic objects under the hood for the router selector.

In [25]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)


query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [None]:
response = query_engine.query("What is the summary of the document?")


DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Code\\Github\\LlamaIndex\\venv\\Library\\ssl\\cacert.pem'
load_verify_locations cafile='C:\\Code\\Github\\LlamaIndex\\venv\\Library\\ssl\\cacert.pem'
DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "Some choices are given below. It is provided in a numbered list (1 to 2), where each item in the list corresponds to a summary.\n---------------------\n(1) Useful for summarization questions related to Paul Graham eassy on What I Worked On.\n\n(2) Useful for retrieving specific context from Paul Graham essay on What I Worked On.\n---------------------\nUsing only the choices above and not prior knowledge, generate the selection object and reason that is most relevant to the question: 'What is t

In [27]:
print(str(response))

The document recounts the journey of an individual who transitioned from writing and programming in his youth to exploring various fields, including artificial intelligence, art, and entrepreneurship. Initially drawn to programming through early experiences with computers, he later shifted his focus to AI during college, only to realize the limitations of the field at that time. This led him to pursue art, where he took classes and developed his painting skills.

After a stint in the software industry, he co-founded Viaweb, an early e-commerce platform, which was later acquired by Yahoo. Following this, he became involved in venture capital, co-founding Y Combinator, a startup accelerator that revolutionized seed funding by supporting multiple startups simultaneously. The document highlights the evolution of his interests, the challenges faced in various endeavors, and the eventual realization of the importance of pursuing less prestigious but fulfilling work. It concludes with his ret

In [28]:
response = query_engine.query("What did Paul Graham do after RICS?")


DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "Some choices are given below. It is provided in a numbered list (1 to 2), where each item in the list corresponds to a summary.\n---------------------\n(1) Useful for summarization questions related to Paul Graham eassy on What I Worked On.\n\n(2) Useful for retrieving specific context from Paul Graham essay on What I Worked On.\n---------------------\nUsing only the choices above and not prior knowledge, generate the selection object and reason that is most relevant to the question: 'What did Paul Graham do after RICS?'\n"}], 'model': 'gpt-4o-mini', 'stream': False, 'temperature': 0.1, 'tool_choice': {'type': 'function', 'function': {'name': 'SingleSelection'}}, 'tools': [{'type': 'function', 'function': {'name': 'SingleSelection', 'description': 'A single selection of a choice.', 'parameters': {'description': 'A single select

In [29]:
print(str(response))

After attending RISD, Paul Graham returned to the US from the Accademia, where he had been studying. He initially wanted to go back to RISD but found himself broke and decided to get a job for a year to save money. He secured a position at a company called Interleaf, which developed software for creating documents.


# LLMSingleSelector
Use OpenAI (or any other LLM) to parse generated JSON under the hood to select a sub-index for routing.

In [30]:
query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [31]:
response = query_engine.query("What is the summary of the document?")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'Some choices are given below. It is provided in a numbered list (1 to 2), where each item in the list corresponds to a summary.\n---------------------\n(1) Useful for summarization questions related to Paul Graham eassy on What I Worked On.\n\n(2) Useful for retrieving specific context from Paul Graham essay on What I Worked On.\n---------------------\nUsing only the choices above and not prior knowledge, return the choice that is most relevant to the question: \'What is the summary of the document?\'\n\n\nThe output should be ONLY JSON formatted as a JSON instance.\n\nHere is an example:\n[\n    {{\n        choice: 1,\n        reason: "<insert reason for choice>"\n    }},\n    ...\n]\n'}], 'model': 'gpt-4o-mini', 'stream': False, 'temperature': 0.1}}
Request options: {'method': 'post', 'url': '/chat/completions', 'files': None

In [32]:
print(str(response))

The document is a reflective essay detailing the author's journey through various phases of life, focusing on their experiences in writing, programming, and art, ultimately leading to the founding of Y Combinator. It begins with the author's early interests in writing and programming, transitioning from short stories to programming on early computers. The narrative explores their academic pursuits in philosophy and artificial intelligence, revealing disillusionment with traditional education and a shift towards self-directed learning, particularly in Lisp programming.

The author recounts their time in art school, the challenges faced, and the realization that art could be a viable career. After a stint in the tech industry, they co-founded Viaweb, an early e-commerce platform, which was later acquired by Yahoo. The essay highlights the author's insights into startup culture, the importance of growth rates, and the dynamics of venture capital.

Eventually, the author and their partners

In [33]:
response = query_engine.query("What did Paul Graham do after RICS?")


DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'Some choices are given below. It is provided in a numbered list (1 to 2), where each item in the list corresponds to a summary.\n---------------------\n(1) Useful for summarization questions related to Paul Graham eassy on What I Worked On.\n\n(2) Useful for retrieving specific context from Paul Graham essay on What I Worked On.\n---------------------\nUsing only the choices above and not prior knowledge, return the choice that is most relevant to the question: \'What did Paul Graham do after RICS?\'\n\n\nThe output should be ONLY JSON formatted as a JSON instance.\n\nHere is an example:\n[\n    {{\n        choice: 1,\n        reason: "<insert reason for choice>"\n    }},\n    ...\n]\n'}], 'model': 'gpt-4o-mini', 'stream': False, 'temperature': 0.1}}
Request options: {'method': 'post', 'url': '/chat/completions', 'files': None,

In [34]:
print(str(response))

After attending the Accademia, Paul Graham returned to the US because he was broke and RISD was very expensive. He decided to get a job for a year to save money before returning to RISD the following fall. He secured a position at a company called Interleaf, which developed software for creating documents.


In [35]:
# [optional] look at selected results
print(str(response.metadata["selector_result"]))

selections=[SingleSelection(index=1, reason="The question asks for specific context regarding Paul Graham's actions after RICS, which aligns with retrieving specific information from the essay.")]


# PydanticMultiSelector
In case you are expecting queries to be routed to multiple indexes, you should use a multi selector. The multi selector sends to query to multiple sub-indexes, and then aggregates all responses using a summary index to form a complete answer.

In [36]:
from llama_index.core import SimpleKeywordTableIndex

keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

keyword_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context using keywords from Paul"
        " Graham essay on What I Worked On."
    ),
)

**********
Trace: index_construction
**********


In [37]:
query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
        keyword_tool,
    ],
)

In [38]:
# This query could use either a keyword or vector query engine, so it will combine responses from both
response = query_engine.query(
    "What were noteable events and people from the authors time at Interleaf"
    " and YC?"
)
print(str(response))

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': "Some choices are given below. It is provided in a numbered list (1 to 3), where each item in the list corresponds to a summary.\n---------------------\n(1) Useful for summarization questions related to Paul Graham eassy on What I Worked On.\n\n(2) Useful for retrieving specific context from Paul Graham essay on What I Worked On.\n\n(3) Useful for retrieving specific context using keywords from Paul Graham essay on What I Worked On.\n---------------------\nUsing only the choices above and not prior knowledge, return the top choice(s) (no more than 3, but only select what is needed) by generating the selection object and reasons that are most relevant to the question: 'What were noteable events and people from the authors time at Interleaf and YC?'\n"}], 'model': 'gpt-4o-mini', 'stream': False, 'temperature': 0.1, 'tool_choice': 

In [39]:
# [optional] look at selected results
print(str(response.metadata["selector_result"]))

selections=[SingleSelection(index=1, reason="This choice is useful for retrieving specific context from the essay, which can provide information about notable events and people during the author's time at Interleaf and YC."), SingleSelection(index=2, reason='This choice allows for retrieving specific context using keywords, which can help in identifying notable events and people mentioned in the essay.')]
