In [1]:
import os
from openai import AzureOpenAI
from dotenv import load_dotenv, find_dotenv
load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
END_POINT = os.getenv("AZURE_OPENAI_ENDPOINT")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")
DEPLOYMENT_NAME = os.getenv("DEPLOYMENT_NAME")



In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(input_files=["metagpt.pdf"]).load_data()

In [19]:
print(documents)

[Document(id_='76ea4210-f2e5-4d74-8059-5ddce467c193', embedding=None, metadata={'page_label': '1', 'file_name': 'metagpt.pdf', 'file_path': 'metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16715764, 'creation_date': '2024-08-27', 'last_modified_date': '2024-08-27'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Preprint\nMETAGPT: M ETA PROGRAMMING FOR A\nMULTI -AGENT COLLABORATIVE FRAMEWORK\nSirui Hong1∗, Mingchen Zhuge2∗, Jonathan Chen1, Xiawu Zheng3, Yuheng Cheng4,\nCeyao Zhang4,Jinlin Wang1,Zili Wang ,Steven Ka Shing Yau5,Zijuan Lin4,\nLiyang Zhou6,Chenyu Ran1,Lingfeng Xiao1,7,Chenglin Wu1†,J¨urgen Schmidhuber2,8\n1DeepWisdom,2AI Initiative, King Abdullah University of Science and Technology,\n3Xiamen University,4The Chinese University of

## Define LLM and Embedding model

In [4]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [9]:
from llama_index.core import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

Settings.llm = AzureOpenAI( model="gpt-4-32k",
    deployment_name=DEPLOYMENT_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=END_POINT,
    api_version=OPENAI_API_VERSION,)
Settings.embed_model = AzureOpenAIEmbedding(    model="text-embedding-ada-002",
    deployment_name="text-embedding-default",
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=END_POINT,
    api_version=OPENAI_API_VERSION,)

## Define Summary Index and Vector Index over the Same Data

In [10]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

In [11]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [12]:
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to MetaGPT"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the MetaGPT paper."
    ),
)

## Define Router Query Engine

In [13]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [14]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: The first choice is more relevant to the question because it refers to summarization, which is what the question is asking for..
[0mThe document is about MetaGPT, a meta-programming framework that uses large language models (LLMs) for multi-agent collaboration. It introduces the concept of Standardized Operating Procedures (SOPs) into the framework, allowing for more streamlined workflows and reducing errors. The system assigns diverse roles to various agents, breaking down complex tasks into subtasks. MetaGPT has shown to generate more coherent solutions than previous chat-based multi-agent systems. It also introduces an executable feedback mechanism to improve code generation quality during runtime. The framework has been tested on multiple benchmarks and has achieved state-of-the-art performance.


In [15]:
print(len(response.source_nodes)) #Used all nodes for summerisation

32


In [16]:
response = query_engine.query(
    "How do agents share information with other agents?"
)
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The question is asking for specific information about how agents share information with other agents, which may be detailed in the MetaGPT paper. Therefore, choice 2 is more relevant as it pertains to retrieving specific context from the paper..
[0mAgents share information with each other through a global message pool. They publish their structured messages in this pool and can also access messages from other entities directly. This eliminates the need for one-to-one communication and waiting for responses from other agents, thereby enhancing communication efficiency. To avoid information overload, an agent can subscribe to only task-related information based on their role profiles. This subscription mechanism allows agents to select and follow information relevant to their roles.
