In [11]:
from dotenv import load_dotenv
import os
import nest_asyncio
from llama_index.core import SimpleDirectoryReader
from langchain_huggingface import HuggingFaceEmbeddings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings, SummaryIndex, VectorStoreIndex
from langchain_mistralai import ChatMistralAI
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


## Load Apis and nest_asyncio

In [12]:
load_dotenv()

api_key = os.environ.get('MISTRAL_API_KEY')

nest_asyncio.apply()

## Load documents

In [13]:
documents = SimpleDirectoryReader(input_files=["../documents/Lomloe.pdf"]).load_data()

## Define LLM and Embedding model

In [14]:
Settings.llm = ChatMistralAI(model="mistral-small-latest")
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")


## Define Summary Index and Vector Index over the Same Data 

In [15]:
splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)
summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes, embed_model=Settings.embed_model)



## Define Query Engines and Set Metadata

In [17]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [21]:
summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to Lomloe"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the Lomloe paper."
    ),
)

## Define Router Query Engine

In [22]:
query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [25]:
response = query_engine.query("Dime las competencias clave que salen en el documento")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The question asks for specific information ('competencias clave') from the Lomloe document, which aligns with retrieving specific context..
[0mEl documento menciona los ciclos formativos de grado básico, que facilitan la adquisición de competencias de educación secundaria obligatoria. Estas competencias se organizan en el ámbito de Comunicación y Ciencias Sociales, que incluye materias como Lengua Castellana, Lengua Extranjera de Iniciación Profesional, Ciencias Sociales y, en su caso, Lengua Cooficial.


In [27]:
print(len(response.source_nodes))

2


## With a help function

In [31]:
def get_router_query_engine(file_path: str, llm = None, embed_model = None):
    """Get router query engine."""
    llm = ChatMistralAI(model="mistral-small-latest")
    embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")
    
    # load documents
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    
    summary_index = SummaryIndex(nodes)
    vector_index = VectorStoreIndex(nodes, embed_model=embed_model)
    
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
        llm=llm
    )
    vector_query_engine = vector_index.as_query_engine(llm=llm)
    
    summary_tool = QueryEngineTool.from_defaults(
        query_engine=summary_query_engine,
        description=(
            "Useful for summarization questions related to MetaGPT"
        ),
    )
    
    vector_tool = QueryEngineTool.from_defaults(
        query_engine=vector_query_engine,
        description=(
            "Useful for retrieving specific context from the MetaGPT paper."
        ),
    )
    
    query_engine = RouterQueryEngine(
        selector=LLMSingleSelector.from_defaults(),
        query_engine_tools=[
            summary_tool,
            vector_tool,
        ],
        verbose=True
    )
    return query_engine

In [33]:
response = query_engine.query(
    "Que dice la nueva ley LOMLOE."
)
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The question 'Que dice la nueva ley LOMLOE.' is asking for specific information or context related to the LOMLOE paper, which aligns with the summary for choice (2)..
[0m

HTTPStatusError: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}

In [30]:
query_engine = get_router_query_engine("../documents/Lomloe.pdf")
response = query_engine.query("Dime las competencias clave que salen en el documento")
print(str(response))

NameError: name 'OpenAI' is not defined