# Router Engine

![Router Engine Diagram](assets/router-engine.png)]

In [1]:
import getpass
import os
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

os.environ["TAVILY_API_KEY"] = "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"
os.environ['ATHINA_API_KEY'] = "IhrJrr0krTMRA9ogqi5aaD4ZuYuvMcdG"


INFERENCE_SERVER_URL = "http://localhost:8989"
# MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
API_KEY= "alanliuxiang"

llm = ChatOpenAI(
    openai_api_key=API_KEY,
    openai_api_base= f"{INFERENCE_SERVER_URL}/v1",
    model_name=MODEL_NAME,
    top_p=0.92,
    temperature=0.01,
    max_tokens=512,
    presence_penalty=1.03,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)


In [2]:
import nest_asyncio

nest_asyncio.apply()

### Load Data

In [3]:
from llama_index.core import SimpleDirectoryReader

In [4]:
# Load documents
documents = SimpleDirectoryReader(input_files=['./assets/metagpt.pdf']).load_data()

### Define LLM and Embedding model

In [5]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [6]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter

Settings.llm = llm
# Settings.embed_model = OpenAIEmbedding(model='text-embedding-ada-002')
Settings.embed_model = HuggingFaceEmbedding()

### Define Summary Index and Vector Index

In [7]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

### Define Query Engines

In [8]:
summary_query_engine = summary_index.as_query_engine(
    response_mode='tree_summarize',
    use_async=True
)

vector_query_engine = vector_index.as_query_engine()

In [9]:
from llama_index.core.tools import QueryEngineTool

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        'Useful for summarization questions related to MetaGPT'
    )
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        'Useful for retrieving specific context from MetaGPT'
    )
)

### Define Router Query Engine

In [10]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool
    ],
    verbose=True
)

In [11]:
response = query_engine.query('Summarize the document')

print(str(response))



```json
[
    {
        "choice": 1,
        "reason": "The question asks for a summary of a document, which aligns with the purpose of choice (1) as it is designed for summarization tasks related to MetaGPT."
    }
]
```[1;3;38;5;200mSelecting query engine 0: The question asks for a summary of a document, which aligns with the purpose of choice (1) as it is designed for summarization tasks related to MetaGPT..
[0m

**Summary of MetaGPT Document**

MetaGPT is a meta-programming framework designed to enhance multi-agent collaborations using Large Language Models (LLMs). It addresses the challenge of complex problem-solving by incorporating human-like Standardized Operating Procedures (SOPs), which streamline workflows and reduce errors caused by naive chaining of LLMs. The framework assigns diverse roles to agents, enabling efficient task decomposition and structured communication. Each agent operates with defined goals and constraints, adhering to SOPs that guide their actions and 

In [12]:
len(response.source_nodes)

34

In [13]:
response = query_engine.query('How do agents share information with other agents ?')

print(str(response))



```json
[
    {
        "choice": 2,
        "reason": "The question asks for specific details on how agents share information, which requires retrieving specific context rather than a general summary."
    }
]
```[1;3;38;5;200mSelecting query engine 1: The question asks for specific details on how agents share information, which requires retrieving specific context rather than a general summary..
[0m

Agents share information through a publish-subscribe mechanism. They publish structured outputs, such as documents and diagrams, into a shared message pool. Each agent can then subscribe to receive only the information relevant to their role, allowing them to efficiently access necessary details without unnecessary distractions.

Agents share information through a publish-subscribe mechanism. They publish structured outputs, such as documents and diagrams, into a shared message pool. Each agent can then subscribe to receive only the information relevant to their role, allowing them t

In [14]:
len(response.source_nodes)

2

### Given a file path create a router query engine

In [15]:
from llama_index.core import SimpleDirectoryReader, SummaryIndex, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.core.tools import QueryEngineTool
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI


def get_router_query_engine(file_path: str, llm = None, embed_model = None):
    """Get router query engine"""
    llm = llm
    embed_model = embed_model
    
    # Load documents
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    
    summary_index = SummaryIndex(nodes)
    vector_index = VectorStoreIndex(nodes)
    
    summary_query_engine = summary_index.as_query_engine(
        response_mode='tree_summarize',
        use_async=True,
        llm=llm
    )
    vector_query_engine = vector_index.as_query_engine()

    summary_tool = QueryEngineTool.from_defaults(
        query_engine=summary_query_engine,
        description=(
            'Useful for summarization questions related to MetaGPT'
        )
    )
    vector_tool = QueryEngineTool.from_defaults(
        query_engine=vector_query_engine,
        description=(
            'Useful for retrieving specific context from MetaGPT'
        )
    )
    
    query_engine = RouterQueryEngine(
        selector=LLMSingleSelector.from_defaults(),
        query_engine_tools=[
            summary_tool,
            vector_tool
        ],
        verbose=True
    )
    
    return query_engine

In [16]:
query_engine = get_router_query_engine('./assets/metagpt.pdf')

In [17]:
response = query_engine.query('Explain ablation study results')

print(str(response))



```json
[
    {
        "choice": 2,
        "reason": "The question requires explaining the results of an ablation study, which involves analyzing specific components and their effects. Choice (2) is relevant as it pertains to retrieving detailed context necessary for such an explanation."
    }
]
```[1;3;38;5;200mSelecting query engine 1: The question requires explaining the results of an ablation study, which involves analyzing specific components and their effects. Choice (2) is relevant as it pertains to retrieving detailed context necessary for such an explanation..
[0m

The ablation study results demonstrate how different factors influence MetaGPT's performance. When using more advanced language models like GPT-4, MetaGPT achieves higher success rates in benchmarks, such as 87.7% in HumanEval, outperforming previous methods. Additionally, providing detailed instructions yields better task execution, though high-level prompts still produce effective results with comparable qu