In [44]:
import dotenv
import nest_asyncio

dotenv.load_dotenv()
nest_asyncio.apply()

In [45]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [46]:
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding

Settings.llm = Gemini(model="models/gemini-2.0-flash-exp")
Settings.embed_model = GeminiEmbedding(model="models/text-embedding-004")

### 1. Define simple tool

In [47]:
from llama_index.core.tools import FunctionTool


def add(x: int, y: int) -> int:
    """Adds two integers together."""
    return x + y


def mystery(x: int, y: int) -> int:
    """Mystery function that operates on top of two numbers."""
    return (x + y) * (x + y)


add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

In [48]:
response = Settings.llm.predict_and_call(
    tools=[add_tool, mystery_tool], user_msg="Add 9 and 12.", verbose=True
)
print(response)

[1;3;38;5;200mThought: The current language of the user is: English. I need to use the add tool to add 9 and 12.
Action: add
Action Input: {'x': 9, 'y': 12}
[0m[1;3;34mObservation: 21
[0m21


In [49]:
response = Settings.llm.predict_and_call(
    tools=[add_tool, mystery_tool],
    user_msg="What is the mystery between 45 and 54",
    verbose=True,
)
print(response)

[1;3;38;5;200mThought: The current language of the user is: English. I need to use the mystery tool to find the result of the mystery function with the given inputs.
Action: mystery
Action Input: {'x': 45, 'y': 54}
[0m[1;3;34mObservation: 9801
[0m9801


In [50]:
from llama_index.core import SimpleDirectoryReader
# load documents
documents = SimpleDirectoryReader(input_files=["metagpt.pdf"]).load_data()

DEBUG:fsspec.local:open file: /home/rayed/Development/agentic-rag-llamaindex/metagpt.pdf
open file: /home/rayed/Development/agentic-rag-llamaindex/metagpt.pdf
open file: /home/rayed/Development/agentic-rag-llamaindex/metagpt.pdf


In [51]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Preprint
METAGPT: M ETA PROGRAMMING FOR A
MULTI...
> Adding chunk: Preprint
METAGPT: M ETA PROGRAMMING FOR A
MULTI...
> Adding chunk: Preprint
METAGPT: M ETA PROGRAMMING FOR A
MULTI...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Preprint
Figure 1: The software development SOP...
> Adding chunk: Preprint
Figure 1: The software development SOP...
> Adding chunk: Preprint
Figure 1: The software development SOP...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Preprint
• We introduce MetaGPT, a meta-program...
> Adding chunk: Preprint
• We introduce MetaGPT, a meta-program...
> Adding chunk: Preprint
• We introduce MetaGPT, a meta-program...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Other works focus on
sociological phenomena. Fo...
> Adding chunk: Other works focus on
sociological phenomena. Fo...
> Adding chunk: Other works focus on
sociological phenomena. Fo...
DEBUG:llama_

In [52]:
print(nodes[0].get_metadata_str())

page_label: 1
file_name: metagpt.pdf
file_path: metagpt.pdf
file_type: application/pdf
file_size: 16911937
creation_date: 2024-12-29
last_modified_date: 2024-12-29


In [53]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [54]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts([{"key": "page_label", "value": "2"}]),
)

response = query_engine.query(
    "What are some high-level results of MetaGPT?",
)

DEBUG:llama_index.core.indices.utils:> Top 1 nodes:
> [Node 6b697477-ae91-490f-9dd5-c01a3b0260d3] [Similarity score:             0.69964] Preprint
Figure 1: The software development SOPs between MetaGPT and real-world human teams.
In s...
> Top 1 nodes:
> [Node 6b697477-ae91-490f-9dd5-c01a3b0260d3] [Similarity score:             0.69964] Preprint
Figure 1: The software development SOPs between MetaGPT and real-world human teams.
In s...
> Top 1 nodes:
> [Node 6b697477-ae91-490f-9dd5-c01a3b0260d3] [Similarity score:             0.69964] Preprint
Figure 1: The software development SOPs between MetaGPT and real-world human teams.
In s...


In [55]:
print(str(response))

MetaGPT achieves a new state-of-the-art with 85.9% and 87.7% in Pass@1 in code generation benchmarks. It also achieves a 100% task completion rate.



In [56]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'metagpt.pdf', 'file_path': 'metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-12-29', 'last_modified_date': '2024-12-29'}


### 2. Define auto-retrieval tool

In [57]:
from typing import List
from llama_index.core.vector_stores import FilterCondition


def vector_query(query: str, page_numbers: List[str]) -> str:
    """Perform a vector search over an index.

    Args:
        query: The query to search for.
        page_numbers: The page numbers to filter the search. If empty, search over all pages.

    """

    metadata_dicts = [{"key": "page_label", "value": page} for page in page_numbers]
    filters = MetadataFilters.from_dicts(metadata_dicts, condition=FilterCondition.OR)

    query_engine = vector_index.as_query_engine(similarity_top_k=2, filters=filters)
    response = query_engine.query(query)

    return response

In [58]:
vector_query_tool = FunctionTool.from_defaults(name="vector_tool", fn=vector_query)
vector_query_tool.metadata

ToolMetadata(description='vector_tool(query: str, page_numbers: List[str]) -> str\nPerform a vector search over an index.\n\n    Args:\n        query: The query to search for.\n        page_numbers: The page numbers to filter the search. If empty, search over all pages.\n\n    ', name='vector_tool', fn_schema=<class 'llama_index.core.tools.utils.vector_tool'>, return_direct=False)

In [59]:
response = Settings.llm.predict_and_call(
    tools=[vector_query_tool],
    user_msg="What are the high-level results of MetaGPT?",
    verbose=True,
)
print(response)

[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'query': 'high-level results of MetaGPT', 'page_numbers': []}
[0mDEBUG:llama_index.core.indices.utils:> Top 2 nodes:
> [Node c5d22293-fd51-40c3-a279-58c9082757b2] [Similarity score:             0.720623] 4.2 M AIN RESULT
AlphaCode(1.1B)Incoder (6.7B)CodeGeeX (13B)
17.1 
— 
15.2 17.6 18.9 26.9 
CodeGe...
> [Node 64d931f6-225b-4325-a75c-35e7c1d1bbd8] [Similarity score:             0.714395] Preprint
Hongxin Zhang, Weihua Du, Jiaming Shan, Qinhong Zhou, Yilun Du, Joshua B Tenenbaum, Tian...
> Top 2 nodes:
> [Node c5d22293-fd51-40c3-a279-58c9082757b2] [Similarity score:             0.720623] 4.2 M AIN RESULT
AlphaCode(1.1B)Incoder (6.7B)CodeGeeX (13B)
17.1 
— 
15.2 17.6 18.9 26.9 
CodeGe...
> [Node 64d931f6-225b-4325-a75c-35e7c1d1bbd8] [Similarity score:             0.714395] Preprint
Hongxin Zhang, Weihua Du, Jiaming Shan, Qinhong Z

In [60]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '7', 'file_name': 'metagpt.pdf', 'file_path': 'metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-12-29', 'last_modified_date': '2024-12-29'}
{'page_label': '14', 'file_name': 'metagpt.pdf', 'file_path': 'metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-12-29', 'last_modified_date': '2024-12-29'}


### 3. Add other tools

In [61]:
type(vector_query_tool)

llama_index.core.tools.function_tool.FunctionTool

In [62]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    reponse_mode="tree_summarize", use_async=True
)
summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description="Useful for a summary of MetaGPT.",
)

In [63]:
type(summary_tool)

llama_index.core.tools.query_engine.QueryEngineTool

In [64]:
response = Settings.llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "What are the MetaGPT comparisons with ChatDev described on page 8?", 
    verbose=True
)

[1;3;38;5;200mThought: The current language of the user is: English. I need to use the vector_tool to find the comparisons between MetaGPT and ChatDev on page 8.
Action: vector_tool
Action Input: {'query': 'MetaGPT comparisons with ChatDev', 'page_numbers': ['8']}
[0mDEBUG:llama_index.core.indices.utils:> Top 1 nodes:
> [Node 807bb05b-72d5-4325-ac99-caeac9a36c60] [Similarity score:             0.727834] Preprint
Figure 5: Demo softwares developed by MetaGPT.
in these two public benchmarks. Moreover,...
> Top 1 nodes:
> [Node 807bb05b-72d5-4325-ac99-caeac9a36c60] [Similarity score:             0.727834] Preprint
Figure 5: Demo softwares developed by MetaGPT.
in these two public benchmarks. Moreover,...
> Top 1 nodes:
> [Node 807bb05b-72d5-4325-ac99-caeac9a36c60] [Similarity score:             0.727834] Preprint
Figure 5: Demo softwares developed by MetaGPT.
in these two public benchmarks. Moreover,...
[1;3;34mObservation: MetaGPT outperforms ChatDev on the SoftwareDev dataset in near

In [65]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '8', 'file_name': 'metagpt.pdf', 'file_path': 'metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-12-29', 'last_modified_date': '2024-12-29'}


In [66]:
response = Settings.llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "What is a summary of the paper?", 
    verbose=True
)

[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: summary_tool
Action Input: {'input': 'What is a summary of the paper?'}
[0m[1;3;34mObservation: This paper introduces MetaGPT, a meta-programming framework for multi-agent collaboration using large language models. It incorporates human workflows into the system by encoding Standardized Operating Procedures (SOPs) into prompt sequences. This allows agents with domain expertise to verify intermediate results and reduce errors. MetaGPT uses an assembly line paradigm to assign diverse roles to various agents, breaking down complex tasks into subtasks. The framework achieves state-of-the-art performance on code generation benchmarks and demonstrates robustness and efficiency in software engineering tasks.

[0m