In [27]:
import dotenv
import nest_asyncio

dotenv.load_dotenv()
nest_asyncio.apply()

In [28]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_files=["metagpt.pdf"])
documents = reader.load_data()

print("Number of docs:", len(documents))

Number of docs: 29


In [29]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

print("Number of nodes:", len(nodes))

Number of nodes: 34


In [30]:
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding

Settings.llm = Gemini(model="models/gemini-2.0-flash-exp")
Settings.embed_model = GeminiEmbedding(model="models/text-embedding-004")

In [31]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

In [32]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [33]:
from llama_index.core.tools import QueryEngineTool

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to MetaGPT"
    ),
)
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the MetaGPT paper."
    ),
)

In [34]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [35]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The question 'What is the summary of the document?' directly asks for a summarization, which is what choice (1) is described as being useful for..
[0mThis document introduces MetaGPT, a meta-programming framework that uses Standardized Operating Procedures (SOPs) to enhance the problem-solving capabilities of multi-agent systems based on Large Language Models (LLMs). MetaGPT simulates a software company with specialized roles, workflows, and communication protocols. It also incorporates an executable feedback mechanism to improve code generation quality. The framework achieves state-of-the-art performance on multiple benchmarks and demonstrates the benefits of integrating human-like SOPs into artificial multi-agent systems.



In [36]:
print(len(response.source_nodes))

34


In [37]:
response = query_engine.query(
    "How do agents share information with other agents?"
)
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The question 'How do agents share information with other agents?' is a summarization question about the functionality of MetaGPT, making choice 1 more relevant..
[0mAgents share information through a shared message pool where they can publish structured messages. They can also subscribe to relevant messages based on their profiles.



In [38]:
from utils import get_router_query_engine

query_engine = get_router_query_engine("metagpt.pdf")

In [39]:
response = query_engine.query("Tell me about the ablation study results?")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: Ablation study results are specific pieces of information within the MetaGPT paper, making choice 2, which focuses on retrieving specific context, the most relevant..
[0mThere is no information about ablation study results in the provided text.

