In [6]:
import logging
import sys

import nest_asyncio
from llama_index import (
    VectorStoreIndex,
    ServiceContext,
    download_loader,
)
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index.embeddings import OpenAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbeddingModelType
from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform
from llama_index.llms import OpenAI
from llama_index.query_engine import SubQuestionQueryEngine, RouterQueryEngine, MultiStepQueryEngine
from llama_index.selectors.pydantic_selectors import PydanticSingleSelector
from llama_index.tools import QueryEngineTool, ToolMetadata

# Set the logging level for openai to ERROR to suppress informational messages
logging.getLogger('openai').setLevel(logging.ERROR)
logging.getLogger('requests').setLevel(logging.ERROR)
logging.getLogger('urllib3').setLevel(logging.ERROR)
logging.basicConfig(level=logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)

nest_asyncio.apply()

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().handlers = []
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


In [7]:
# load documents
WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()
pages = ['Nicolas_Cage', 'The_Best_of_Times_(1981_film)', 'Leonardo DiCaprio']
documents = loader.load_data(pages=pages, auto_suggest=False, redirect = False)

# initialize service context (set chunk size)
gpt3 = OpenAI(temperature=0, model="text-davinci-003")

embed_model = OpenAIEmbedding(model= OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002)

service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3, chunk_size = 256, chunk_overlap=0, embed_model=embed_model)


# simple retriever
simple_index = VectorStoreIndex.from_documents(documents, service_context=service_context_gpt3)
simple_query_engine = simple_index.as_query_engine()


simple_tool = QueryEngineTool.from_defaults(
    query_engine=simple_query_engine,
    description="Useful when the query is relatively straightforward and can be answered with direct information retrieval, without the need for complex transformations.",
)

# multi-step query
step_decompose_transform_gpt3 = StepDecomposeQueryTransform(
    llm=gpt3, verbose=True
)
index_summary = "Breaks down the initial query"

multi_step_query_engine = simple_index.as_query_engine(service_context=service_context_gpt3)

multi_step_query_engine = MultiStepQueryEngine(
    query_engine=multi_step_query_engine,
    query_transform=step_decompose_transform_gpt3,
    index_summary=index_summary,
)

multi_step_tool = QueryEngineTool.from_defaults(
    query_engine=multi_step_query_engine,
    description="Useful when complex or multifaceted information needs are present, and a single query isn't sufficient to fully understand or retrieve the necessary information. This approach is especially beneficial in environments where the context evolves with each interaction or where the information is layered and requires iterative exploration.",
)

# sub-question query engine
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

service_context_sub_question = ServiceContext.from_defaults(
    callback_manager=callback_manager,chunk_size=256, chunk_overlap=0
)

vector_query_engine_sub_question = VectorStoreIndex.from_documents(
    documents, use_async=False, service_context=service_context_sub_question
).as_query_engine(similarity_top_k=5)

query_engine_tools = [
    QueryEngineTool(
        query_engine=vector_query_engine_sub_question,
        metadata=ToolMetadata(
            name="Sub-question query engine",
            description="Questions about actors",
        ),
    ),
]

query_engine_sub_question = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    service_context=service_context_sub_question,
    use_async= False
)

sub_question_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine_sub_question,
    description="Useful when complex questions can be effectively broken down into simpler sub-questions, each of which can be answered independently. For example if you have to compare two ore more things.",
)


query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        simple_tool,
        multi_step_tool,
        sub_question_tool,
    ],
)

**********
Trace: index_construction
    |_node_parsing ->  0.088957 seconds
      |_chunking ->  0.029358 seconds
      |_chunking ->  0.000334 seconds
      |_chunking ->  0.038476 seconds
    |_embedding ->  1.885996 seconds
    |_embedding ->  3.587934 seconds
    |_embedding ->  1.537834 seconds
    |_embedding ->  0.648983 seconds
**********


In [11]:
response_1 = query_engine.query("What is Nicolas Cage's profession?")
print(str(response_1))

Selecting query engine 0: The query is relatively straightforward and can be answered with direct information retrieval..
 Nicolas Cage is an American actor and film producer.


In [23]:
response_1.metadata["selector_result"].selections[0].reason

'The query is relatively straightforward and can be answered with direct information retrieval.'

In [12]:
response_2 = query_engine.query("Compare the education received by Nicolas Cage and Leonardo DiCaprio.")
print(str(response_2))

Selecting query engine 2: The question requires comparing the education received by two individuals, which can be effectively broken down into simpler sub-questions..
Generated 2 sub questions.
[1;3;38;2;237;90;200m[Sub-question query engine] Q: What is the education of Nicolas Cage?
[0m[1;3;38;2;237;90;200m[Sub-question query engine] A: Nicolas Cage attended UCLA School of Theater, Film and Television.
[0m[1;3;38;2;90;149;237m[Sub-question query engine] Q: What is the education of Leonardo DiCaprio?
[0m[1;3;38;2;90;149;237m[Sub-question query engine] A: Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies for four years and later the Seeds Elementary School. He later enrolled at the John Marshall High School, but dropped out later and eventually earned a general equivalency diploma.
[0mNicolas Cage received his education at UCLA School of Theater, Film and Television, while Leonardo DiCaprio attended the Los Angeles Center for Enriched Studies, Seeds Elementa

In [21]:
response_2.metadata["selector_result"].selections[0].reason

'The question requires comparing the education received by two individuals, which can be effectively broken down into simpler sub-questions.'

In [13]:
response_3 = query_engine.query("Who directed the pilot that marked the acting debut of Nicolas Cage?")
print(str(response_3))

Selecting query engine 1: The question requires complex or multifaceted information retrieval, as it involves identifying the director of a specific pilot episode and the acting debut of Nicolas Cage..
[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?
[0m[1;3;38;5;200m> New query:  What was the name of the pilot that marked the acting debut of Nicolas Cage?
[0m[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?
[0m[1;3;38;5;200m> New query:  Who directed The Best of Times?
[0m[1;3;33m> Current query: Who directed the pilot that marked the acting debut of Nicolas Cage?
[0m[1;3;38;5;200m> New query:  Who was Don Mischer?
[0mDon Mischer


In [24]:
response_3.metadata["selector_result"].selections[0].reason

'The question requires complex or multifaceted information retrieval, as it involves identifying the director of a specific pilot episode and the acting debut of Nicolas Cage.'