In [12]:
# Celda 1: Importaciones
from dotenv import load_dotenv
import os
import nest_asyncio
import pickle
from pathlib import Path
from llama_index.core import SimpleDirectoryReader, Document, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import SummaryIndex, VectorStoreIndex
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.mistralai import MistralAI
from langchain_mistralai import ChatMistralAI

# Load environment variables
load_dotenv()
api_key = os.environ.get('MISTRAL_API_KEY')
nest_asyncio.apply()

## Configuración de modelos y directorios

In [13]:
# Celda 2: Configuración de modelos y directorios
# Configurar modelos por defecto
Settings.llm = ChatMistralAI(model="mistral-small-latest")
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")

# Crear directorio para embeddings si no existe
embeddings_dir = Path("data/embeddings")
embeddings_dir.mkdir(parents=True, exist_ok=True)

## Funciones de gestión de embeddings

In [14]:
# Celda 3: Funciones de gestión de embeddings
def save_embeddings(nodes, doc_name):
    """Guarda los embeddings en un archivo."""
    embeddings_path = embeddings_dir / f"{doc_name}_embeddings.pkl"
    with open(embeddings_path, 'wb') as f:
        pickle.dump(nodes, f)

def load_embeddings(doc_name):
    """Carga los embeddings desde un archivo."""
    embeddings_path = embeddings_dir / f"{doc_name}_embeddings.pkl"
    if embeddings_path.exists():
        with open(embeddings_path, 'rb') as f:
            return pickle.load(f)
    return None

## Función principal para crear el motor de consultas

In [15]:
# Celda 4: Función principal para crear el motor de consultas
def get_router_query_engine(file_path, force_reload=False):
    """
    Crea un motor de consultas con soporte para caché de embeddings.
    
    Args:
        file_path (str): Ruta al archivo PDF
        force_reload (bool): Si es True, recrea los embeddings aunque existan en caché
    
    Returns:
        RouterQueryEngine: Motor de consultas configurado
    """
    # Obtener nombre del documento
    doc_name = Path(file_path).stem
    
    # Intentar cargar embeddings desde caché
    nodes = None
    if not force_reload:
        nodes = load_embeddings(doc_name)
    
    # Si no hay embeddings en caché o se fuerza la recarga
    if nodes is None:
        # Cargar documentos
        documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
        
        # Crear nodos
        splitter = SentenceSplitter(chunk_size=1024)
        nodes = splitter.get_nodes_from_documents(documents)
        
        # Guardar embeddings para uso futuro
        save_embeddings(nodes, doc_name)
    
    # Crear índices
    summary_index = SummaryIndex(nodes)
    vector_index = VectorStoreIndex(nodes, embed_model=Settings.embed_model)
    
    # Configurar motores de consulta
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
        llm=Settings.llm
    )
    vector_query_engine = vector_index.as_query_engine(llm=Settings.llm)
    
    # Crear herramientas de consulta
    summary_tool = QueryEngineTool.from_defaults(
        query_engine=summary_query_engine,
        description=(
            "Útil para preguntas de resumen relacionadas con el documento"
        ),
    )
    
    vector_tool = QueryEngineTool.from_defaults(
        query_engine=vector_query_engine,
        description=(
            "Útil para recuperar contexto específico del documento"
        ),
    )
    
    # Crear motor de consultas con enrutamiento
    query_engine = RouterQueryEngine(
        selector=LLMSingleSelector.from_defaults(),
        query_engine_tools=[
            summary_tool,
            vector_tool,
        ],
        verbose=True
    )
    return query_engine

## Ejemplo de uso

In [16]:
# Celda 5: Ejemplo de uso
# Inicializar motor de consultas (primera vez creará y guardará los embeddings)
query_engine = get_router_query_engine("../documents/metagpt.pdf")

# Ejemplo de consulta
response = query_engine.query("¿Cuáles son los puntos principales del documento?")
print(str(response))

# Si necesitas forzar la recarga de embeddings (por ejemplo, si el documento ha cambiado)
# query_engine = get_router_query_engine("../documents/metagpt.pdf", force_reload=True)
# response = query_engine.query("¿Cuáles son los puntos principales del documento?")
# print(str(response))

[1;3;38;5;200mSelecting query engine 0: The question asks for the main points of the document, which aligns with summarizing the document..
[0mThe main points of the document are:

- Introduction of a framework called MetaGPT, designed for multi-agent collaboration based on large language models (LLMs).
- MetaGPT incorporates Standardized Operating Procedures (SOPs) to streamline workflows, reduce errors, and maintain consistency in communication.
- The framework utilizes an assembly line paradigm, assigning diverse roles to various agents to break down complex tasks into subtasks.
- MetaGPT achieves state-of-the-art performance in collaborative software engineering benchmarks, generating more coherent solutions than previous chat-based multi-agent systems.
- The system uses specialized roles like Product Manager, Architect, Project Manager, Engineer, and QA Engineer to handle complex software development tasks.
- MetaGPT emphasizes structured communication and the use of a publish-s

In [23]:
response = query_engine.query("Dame las 10 primeras lineas del documento")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: The question asks for specific lines from the document, which aligns with recovering specific context from the document..
[0mThe document begins with a table that presents additional results of a system called MetaGPT without feedback on software development tasks. The table includes various statistics for 10 randomly selected tasks, such as the number of code files, lines of code, lines per code file, number of documentation files, lines of documentation, lines per documentation file, prompt tokens, completion tokens, time costs, and money costs. Additionally, it lists the cost of revision, code executability, and specific issues encountered in each task.


## Configurar el llm de mistral

llm = MistralAI(model="mistral-small-latest")

# Define a prompt that guides Mistral to use tools
prompt_template = PromptTemplate(
    """Given a user query and a set of available tools, determine which tool is most appropriate to use to answer the query.

You have access to the following tools:
{{tool_desc}}

Carefully consider the user's request and the description of each tool to decide which one can best address the query.

Once you have selected a tool, you must respond with a JSON object in the following format:
{{{{
  "tool_code": "<name_of_the_selected_tool>",
  "tool_args": {{"<arg_name>": <value>, "<arg_name>": <value>, ...}}
}}}}

If the user's query can be answered directly without using any of the provided tools, respond with a JSON object indicating that no tool is needed:
{{{{
  "tool_code": "none",
  "tool_args": {{}}
}}}}

User Query: {{query_str}}
"""
)
# Create a custom output parser for ReAct-like behavior
class SimpleToolOutputParser(ReActOutputParser):
    def format_tools_as_string(self, tools):
        return "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])

    def parse_tool_code(self, output: str) -> str:
        # Simple extraction assuming "tool_code": "tool_name" format
        import json
        try:
            return json.loads(output)["tool_code"]
        except (json.JSONDecodeError, KeyError):
            return None

    def parse_tool_args(self, output: str) -> dict:
        # Simple extraction assuming "tool_args": {"arg1": value, ...} format
        import json
        try:
            return json.loads(output)["tool_args"]
        except (json.JSONDecodeError, KeyError):
            return {}

# Instantiate the custom output parser
output_parser = SimpleToolOutputParser()

# Create a custom chat formatter (optional, but can help guide Mistral)
chat_formatter = ReActChatFormatter(
    system_prompt_template=prompt_template,
    prompt_type=PromptType.SIMPLE_INPUT,
)

# Format the prompt for Mistral
formatted_messages = chat_formatter.format(
    tools=[add_tool, mystery_tool],
    chat_history=[ChatMessage(role=MessageRole.USER, content="What is mystery(2, 9)?")],
    current_reasoning=None
)
# Predict using Mistral
user_query_content = formatted_messages[-1].content
final_prompt = PromptTemplate("{query_str}")
response_str = llm.predict(final_prompt, query_str=user_query_content)
print(f"Mistral's raw response:\n{response_str}\n")

# Parse the output to get the tool and arguments
tool_code = output_parser.parse_tool_code(response_str)
tool_args = output_parser.parse_tool_args(response_str)

print(f"Selected tool: {tool_code}")
print(f"Tool arguments: {tool_args}")

# Execute the tool if one was selected
if tool_code == "mystery_function":
    result = mystery(tool_args.get("x"), tool_args.get("y"))
    print(f"\nOutput of mystery_function(2, 9): {result}")
elif tool_code == "add":
    result = add(tool_args.get("a"), tool_args.get("b"))
    print(f"\nOutput of add(): {result}")
else:
    print("\nNo tool was selected, or the tool code was not recognized.")

query_engine = get_router_query_engine("../documents/metagpt.pdf")
response = query_engine.query("Dime las competencias clave que salen en el documento")
print(str(response))