In [2]:
import os
import re
import torch
import requests
import getpass
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd
from time import time
import gradio as gr
from transformers import BitsAndBytesConfig
from llama_index.core import Settings, Document, VectorStoreIndex, StorageContext
from llama_index.core.prompts import PromptTemplate
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.agent import ReActAgent, FunctionCallingAgentWorker, AgentRunner
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore

In [3]:
from dotenv import load_dotenv

load_dotenv("../.env")

True

In [4]:
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

In [5]:
from llama_index.core import PromptTemplate, Settings
from llama_index.llms.openai import OpenAI

In [6]:
Settings.llm = OpenAI(model="gpt-4o", 
                        max_tokens=1000, 
                        temperature=0.5, 
                        top_p=1.0, 
                        frequency_penalty=0.0, 
                        presence_penalty=0.0, 
                        stop=["\n"]
                    )


ValidationError: 2 validation errors for OpenAI
logprobs
  Field required [type=missing, input_value={'model': 'gpt-4o', 'temp...y': 0.0, 'stop': ['\n']}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.7/v/missing
default_headers
  Input should be a valid dictionary [type=dict_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.7/v/dict_type

In [None]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from llama_index.core import VectorStoreIndex


qdrant_client = QdrantClient(
    host=os.getenv('QDRANT_HOST'),
    port=os.getenv('QDRANT_PORT'),
    api_key=os.getenv('QDRANT_API_KEY'),
)

vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=os.getenv('QDRANT_COLLECTION_NAME'),
)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [None]:
from llama_index.core.postprocessor.llm_rerank import LLMRerank
ranker = LLMRerank(
            choice_batch_size=5, top_n=3, llm=OpenAI(model="gpt-4o-mini")
        )

In [None]:
query_engine = index.as_query_engine(
    llm = Settings.llm,
    node_post_processor=[ranker]
)

In [None]:
# Defining the self RAG logic flow
now = time()
response = query_engine.query("What are the two main metrics used to evaluate the performance of the different rerankers in the RAG system?")
display_response(response)

print(f"Source: {response.source_nodes[0].metadata['source']}")
print(f"Title: {response.source_nodes[0].metadata['title']}")
print(f"URL: {response.source_nodes[0].metadata['url']}")
print(f"Date: {response.source_nodes[0].metadata['date']}")
print(f"Elapsed: {round(time() - now, 2)}s")


**`Final Response:`** The two main metrics used to evaluate the performance of the different rerankers in the RAG system are Hit Rate and Mean Reciprocal Rank (MRR).

Source: /blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83
Title: Boosting RAG: Picking the Best Embedding & Reranker models
URL: https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83
Date: Nov 3, 2023
Elapsed: 2.3s


In [None]:
display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

**`Final Response:`** The two main metrics used to evaluate the performance of the different rerankers in the RAG system are Hit Rate and Mean Reciprocal Rank (MRR).

---

**`Source Node 1/2`**

**Node ID:** 4bde7f43-606f-47db-b581-bb338b60efe4<br>**Similarity:** 0.88150674<br>**Text:** Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval system, we primarily relied on two widely accepted metrics: Hit Rate and Mean Reciprocal Rank (MRR). Let’s delve into these metrics to understand their significance and how they operate. Hit Rate: Hit rate calculates the fraction of queries where the correct an...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

---

**`Source Node 2/2`**

**Node ID:** 2d0a3c89-1155-45a4-b2e0-12e0fd338c7d<br>**Similarity:** 0.86984634<br>**Text:** How do we know which embedding model fits our data best? Or which reranker boosts our results the most? In this blog post, we’ll use the Retrieval Evaluation module from LlamaIndex to swiftly determine the best combination of embedding and reranker models. Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval syst...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

In [None]:
chat_engine = index.as_chat_engine(
    chat_mode = "openai",
    llm = Settings.llm,
    node_post_processor=[ranker]
)

In [None]:
response = chat_engine.chat("Hi")
display_response(response)

**`Final Response:`** Hello again! How can I help you today?

In [None]:
response = chat_engine.chat(
    "What is llama-index?"
)

In [None]:
display_response(response)


**`Final Response:`** LlamaIndex is an advanced data framework tailored for large language model (LLM) applications. It offers a comprehensive suite of features for managing and querying data, such as:

- **Data Ingestion**: Importing data from various sources.
- **Parsing/Slicing**: Processing and structuring data.
- **Storage/Indexing**: Efficiently storing and indexing data for quick retrieval.
- **Retrieval**: Accessing the stored data as needed.
- **Response Synthesis**: Generating responses based on the retrieved data.
- **Multi-step Interactions**: Handling complex queries that require multiple steps.

LlamaIndex facilitates the integration of individual or enterprise data from diverse sources, including files, workplace applications, and databases, with LLM applications. It also supports numerous integrations with storage providers, downstream applications, and observability and experimentation frameworks. Additionally, it can function as a ChatGPT Retrieval Plugin or be used with Poe.

In [None]:
response = chat_engine.chat(
    "What are the two main metrics used to evaluate the performance of the different rerankers in the RAG system?"
)

In [None]:
display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

**`Final Response:`** The two main metrics used to evaluate the performance of the different rerankers in the RAG (Retrieval-Augmented Generation) system are:

1. **Hit Rate**: This measures the frequency at which the correct answer appears in the top-k retrieved documents.
2. **Mean Reciprocal Rank (MRR)**: This metric evaluates the rank of the first correct answer by taking the reciprocal of its rank and averaging these values across multiple queries.

---

**`Source Node 1/2`**

**Node ID:** 4bde7f43-606f-47db-b581-bb338b60efe4<br>**Similarity:** 0.88150674<br>**Text:** Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval system, we primarily relied on two widely accepted metrics: Hit Rate and Mean Reciprocal Rank (MRR). Let’s delve into these metrics to understand their significance and how they operate. Hit Rate: Hit rate calculates the fraction of queries where the correct an...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

---

**`Source Node 2/2`**

**Node ID:** 2d0a3c89-1155-45a4-b2e0-12e0fd338c7d<br>**Similarity:** 0.86984634<br>**Text:** How do we know which embedding model fits our data best? Or which reranker boosts our results the most? In this blog post, we’ll use the Retrieval Evaluation module from LlamaIndex to swiftly determine the best combination of embedding and reranker models. Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval syst...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

In [None]:
response = chat_engine.chat(
    "What is the best way to evaluate the performance of the different rerankers in the RAG system?"
)

display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

**`Final Response:`** The best way to evaluate the performance of different rerankers in the RAG (Retrieval-Augmented Generation) system is to use the Retrieval Evaluation module from LlamaIndex and focus on the following two key metrics:

1. **Hit Rate**: This metric measures the fraction of queries where the correct answer is found within the top-k retrieved documents. It indicates how often the system retrieves the correct document within the top guesses.

2. **Mean Reciprocal Rank (MRR)**: This metric assesses the system's accuracy by considering the rank of the highest-placed relevant document for each query. It takes the reciprocal of the rank of the first relevant document and averages these values across multiple queries.

By focusing on these metrics, you can gain a comprehensive understanding of the rerankers' effectiveness in retrieving relevant documents.

---

**`Source Node 1/2`**

**Node ID:** 2d0a3c89-1155-45a4-b2e0-12e0fd338c7d<br>**Similarity:** 0.86747634<br>**Text:** How do we know which embedding model fits our data best? Or which reranker boosts our results the most? In this blog post, we’ll use the Retrieval Evaluation module from LlamaIndex to swiftly determine the best combination of embedding and reranker models. Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval syst...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

---

**`Source Node 2/2`**

**Node ID:** 4bde7f43-606f-47db-b581-bb338b60efe4<br>**Similarity:** 0.8631115<br>**Text:** Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval system, we primarily relied on two widely accepted metrics: Hit Rate and Mean Reciprocal Rank (MRR). Let’s delve into these metrics to understand their significance and how they operate. Hit Rate: Hit rate calculates the fraction of queries where the correct an...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

In [None]:
response = chat_engine.chat(
    "Hit Rate? What is that?"
)

display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

**`Final Response:`** **Hit Rate** is a metric used to evaluate the performance of rerankers in the RAG (Retrieval-Augmented Generation) system. It measures the proportion of queries for which the correct answer or relevant document appears within the top-k retrieved documents. Essentially, it evaluates how often the system successfully identifies the correct information within its top few guesses. 

For example, if the Hit Rate is calculated for the top-5 documents (Hit@5), it indicates the percentage of queries where the correct answer is found among the first five retrieved documents. A higher Hit Rate means the system is more effective at retrieving relevant information quickly.

---

**`Source Node 1/2`**

**Node ID:** e469c097-859b-4b52-94f4-f389436a9764<br>**Similarity:** 0.9029674<br>**Text:** Let’s delve into these metrics to understand their significance and how they operate. Hit Rate: Hit rate calculates the fraction of queries where the correct answer is found within the top-k retrieved documents. In simpler terms, it’s about how often our system gets it right within the top few guesses. Mean Reciprocal Rank (MRR): For each query, MRR evaluates the system’s accuracy by looking at the rank of the highest-placed relevant document. Specifically, it’s the average of the reciprocals...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

---

**`Source Node 2/2`**

**Node ID:** ebb4aab6-352a-45ed-8a07-7fd6a205cdb5<br>**Similarity:** 0.8895761<br>**Text:** Hit Rate: Hit Rate measures the proportion of queries for which the correct chunk/ context appears within the top-k results chunks/ contexts. Put simply, it evaluates how frequently our system correctly identifies the chunk within its top-k chunks. Mean Reciprocal Rank (MRR): MRR assesses a system’s accuracy by taking into account the position of the highest-ranking relevant chunk/ context for each query. It calculates the average of the inverse of these positions across all queries. For inst...<br>**Metadata:** {'source': '/blog/llamaindex-enhancing-retrieval-performance-with-alpha-tuning-in-hybrid-search-in-rag-135d0c9b8a00', 'title': 'LlamaIndex: Enhancing Retrieval Performance with Alpha Tuning in Hybrid Search in RAG', 'url': 'https://www.llamaindex.ai/blog/llamaindex-enhancing-retrieval-performance-with-alpha-tuning-in-hybrid-search-in-rag-135d0c9b8a00', 'date': 'Jan 31, 2024'}<br>

In [None]:
response = chat_engine.chat(
    "And the second metric you mentione in the previous response?"
)

display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

**`Final Response:`** The second metric I mentioned is **Mean Reciprocal Rank (MRR)**. Here's a detailed explanation:

**Mean Reciprocal Rank (MRR)** is a metric used to evaluate the effectiveness of a retrieval system by considering the rank position of the first relevant document for each query. It is calculated as follows:

1. **Reciprocal Rank**: For each query, determine the rank position of the first relevant document. The reciprocal rank is then calculated as \( \frac{1}{\text{rank}} \). For example, if the first relevant document is at rank 1, the reciprocal rank is 1. If it is at rank 3, the reciprocal rank is \( \frac{1}{3} \).

2. **Mean Reciprocal Rank**: The MRR is the average of the reciprocal ranks across all queries. It is given by the formula:
   \[
   \text{MRR} = \frac{1}{N} \sum_{i=1}^{N} \frac{1}{\text{rank}_i}
   \]
   where \( N \) is the total number of queries, and \( \text{rank}_i \) is the rank position of the first relevant document for the \( i \)-th query.

MRR provides a single score that reflects the system's ability to rank relevant documents highly. A higher MRR indicates better performance, as it means relevant documents are appearing closer to the top of the ranked list.

In [None]:
# Create a QueryEngineTool for vector search and summary
vector_tool = QueryEngineTool(
    index.as_query_engine(),
    metadata=ToolMetadata(
        name="vector_search",
        description="Useful for searching for information."
    )
)

summary_tool = QueryEngineTool(
    index.as_query_engine(response_mode = "tree_summarize"),
    metadata=ToolMetadata(
        name="summary",
        description="Useful for summarizing information."
    )
)

In [None]:
query_engine1 = RouterQueryEngine.from_defaults(
    [vector_tool, summary_tool],
    verbose=True
)

In [None]:
response = query_engine1.query("What are key features of llama-agents?")
display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

[1;3;38;5;200mSelecting query engine 0: Searching for key features of llama-agents requires finding information, which aligns with the purpose of searching for information..
[0m

**`Final Response:`** Key features of llama-agents include:

1. **Distributed Service Oriented Architecture**: Each agent can operate as an independently running microservice, managed by a customizable LLM-powered control plane that routes and distributes tasks.
2. **Communication via standardized API interfaces**: Agents interface using a central control plane orchestrator and communicate by passing messages through a message queue.
3. **Define agentic and explicit orchestration flows**: Developers can either directly define the sequence of interactions between agents or use an "agentic orchestrator" to determine relevant agents for tasks.

---

**`Source Node 1/2`**

**Node ID:** 92279433-6f7e-44c1-bb53-805a502c65a0<br>**Similarity:** 0.8826333<br>**Text:** Whether you're working on complex question-answering systems, collaborative AI assistants, or distributed AI workflows, llama-agents provides the tools and structure you need to bring your ideas to life. llama-agents Subsection: Key Features of llama-agents: - Distributed Service Oriented Architecture: every agent in LlamaIndex can be its own independently running microservice, orchestrated by a fully customizable LLM-powered control plane that routes and distributes tasks. - Communication vi...<br>**Metadata:** {'source': '/blog/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems', 'title': 'Introducing llama-agents: A Powerful Framework for Building Production Multi-Agent AI Systems', 'url': 'https://www.llamaindex.ai/blog/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems', 'date': 'Jun 26, 2024'}<br>

---

**`Source Node 2/2`**

**Node ID:** 881ee9fe-b681-432f-a751-d4fa52fcb94e<br>**Similarity:** 0.874347<br>**Text:** We're excited to announce the alpha release of llama-agents, a new open-source framework designed to simplify the process of building, iterating, and deploying multi-agent AI systems and turn your agents into production microservices. Whether you're working on complex question-answering systems, collaborative AI assistants, or distributed AI workflows, llama-agents provides the tools and structure you need to bring your ideas to life. llama-agents Subsection: Key Features of llama-agents: - D...<br>**Metadata:** {'source': '/blog/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems', 'title': 'Introducing llama-agents: A Powerful Framework for Building Production Multi-Agent AI Systems', 'url': 'https://www.llamaindex.ai/blog/introducing-llama-agents-a-powerful-framework-for-building-production-multi-agent-ai-systems', 'date': 'Jun 26, 2024'}<br>

In [None]:
response = query_engine1.query("Not hit rate")
display_response(
    response, show_source=True, source_length=500, show_source_metadata=True
)

[1;3;38;5;200mSelecting query engine 0: Searching for information is more relevant to understanding why something is not a hit rate..
[0m

**`Final Response:`** Mean reciprocal rank

---

**`Source Node 1/2`**

**Node ID:** ff55e25c-2a1d-4819-87c9-c445c6c24270<br>**Similarity:** 0.7942804<br>**Text:** SubSubsection: Evaluating the NetworkRetriever NetworkRetriever To evaluate the efficacy of the NetworkRetriever we make use of our test set in order to compute two traditional retrieval metrics, namely: hit rate and mean reciprocal rank. NetworkRetriever - hit rate: a hit occurs if any of the retrieved nodes share the same disease label as the test query (symptoms). The hit rate then is the total number of hits divided by the size of the test set. - mean reciprocal rank: similar to hit rate,...<br>**Metadata:** {'source': '/blog/retrieving-privacy-safe-documents-over-a-network', 'title': 'Retrieving Privacy-Safe Documents Over A Network', 'url': 'https://www.llamaindex.ai/blog/retrieving-privacy-safe-documents-over-a-network', 'date': 'Mar 20, 2024'}<br>

---

**`Source Node 2/2`**

**Node ID:** 9c84211a-37d8-49f7-9101-06a6f39223e2<br>**Similarity:** 0.794103<br>**Text:** NetworkRetriever - hit rate: a hit occurs if any of the retrieved nodes share the same disease label as the test query (symptoms). The hit rate then is the total number of hits divided by the size of the test set. - mean reciprocal rank: similar to hit rate, but now we take into account the position of the first retrieved node that shares the same disease label as the test query. If there is no such retrieved node, then the reciprocal rank of the test is equal to 0. The mean reciprocal rank i...<br>**Metadata:** {'source': '/blog/retrieving-privacy-safe-documents-over-a-network', 'title': 'Retrieving Privacy-Safe Documents Over A Network', 'url': 'https://www.llamaindex.ai/blog/retrieving-privacy-safe-documents-over-a-network', 'date': 'Mar 20, 2024'}<br>

In [None]:
from llama_index.core.agent import ReActAgent

agent = ReActAgent.from_tools(
    [vector_tool, summary_tool],
    llm=Settings.llm,
    node_post_processor=[ranker],
    verbose=True
)

In [None]:
response = agent.chat("What is the best way to evaluate the performance of the different rerankers in the RAG system?")

> Running step b9270e1d-65aa-4bb0-a275-9b0ab9946deb. Step input: What is the best way to evaluate the performance of the different rerankers in the RAG system?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_search
Action Input: {'input': 'best way to evaluate the performance of different rerankers in the RAG system'}
[0m[1;3;34mObservation: The best way to evaluate the performance of different rerankers in the RAG system is to use metrics such as Hit Rate and Mean Reciprocal Rank (MRR). These metrics provide insights into how effectively the retrieval system is performing, with Hit Rate indicating the fraction of queries where the correct answer is found within the top-k retrieved documents, and MRR measuring the rank of the first correct answer.
[0m> Running step e41aab81-57d2-4d4a-951d-6cfd4c1a7260. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use 

In [None]:
display_response(response, show_source=True, source_length=500, show_source_metadata=True)

**`Final Response:`** The best way to evaluate the performance of different rerankers in the RAG system is to use metrics such as Hit Rate and Mean Reciprocal Rank (MRR). Hit Rate indicates the fraction of queries where the correct answer is found within the top-k retrieved documents, while MRR measures the rank of the first correct answer. These metrics provide valuable insights into the effectiveness of the retrieval system.

---

**`Source Node 1/2`**

**Node ID:** bfd5fa0b-49f4-4da9-8adc-4b9151a24081<br>**Similarity:** 0.8654271<br>**Text:** However, actual outcomes may differ based on data characteristics, dataset size, and other variables like chunk_size, similarity_top_k, and so on. It’s worth mentioning that these results provide a solid insight into performance for this particular dataset and task. However, actual outcomes may differ based on data characteristics, dataset size, and other variables like chunk_size, similarity_top_k, and so on. The table below showcases the evaluation results based on the metrics of Hit Rate a...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

---

**`Source Node 2/2`**

**Node ID:** 2d0a3c89-1155-45a4-b2e0-12e0fd338c7d<br>**Similarity:** 0.8604243<br>**Text:** How do we know which embedding model fits our data best? Or which reranker boosts our results the most? In this blog post, we’ll use the Retrieval Evaluation module from LlamaIndex to swiftly determine the best combination of embedding and reranker models. Let's dive in! Retrieval Evaluation Let’s first start with understanding the metrics available in Retrieval Evaluation Retrieval Evaluation Section: Understanding Metrics in Retrieval Evaluation:: To gauge the efficacy of our retrieval syst...<br>**Metadata:** {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'title': 'Boosting RAG: Picking the Best Embedding & Reranker models', 'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83', 'date': 'Nov 3, 2023'}<br>

In [None]:
response = agent.chat("Can you summarize your previous answer?")

> Running step 5e7904fb-1327-48d8-980f-19b85d133825. Step input: Can you summarize your previous answer?
[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me summarize my previous answer.
Action: summary
Action Input: {'input': 'The best way to evaluate the performance of different rerankers in the RAG system is to use metrics such as Hit Rate and Mean Reciprocal Rank (MRR). Hit Rate indicates the fraction of queries where the correct answer is found within the top-k retrieved documents, while MRR measures the rank of the first correct answer. These metrics provide valuable insights into the effectiveness of the retrieval system.'}
[0m[1;3;34mObservation: The best way to evaluate the performance of different rerankers in the RAG system is to use metrics such as Hit Rate and Mean Reciprocal Rank (MRR). Hit Rate indicates the fraction of queries where the correct answer is found within the top-k retrieved documents, while MRR measures the

{'e469c097-859b-4b52-94f4-f389436a9764': {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83',
  'title': 'Boosting RAG: Picking the Best Embedding & Reranker models',
  'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83',
  'date': 'Nov 3, 2023'},
 '4bde7f43-606f-47db-b581-bb338b60efe4': {'source': '/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83',
  'title': 'Boosting RAG: Picking the Best Embedding & Reranker models',
  'url': 'https://www.llamaindex.ai/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83',
  'date': 'Nov 3, 2023'}}

In [None]:
response = agent.chat("hi")

> Running step 415d42a2-7389-4394-b2bc-31f1f24b7f4d. Step input: hi
[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Answer: Hello! How can I assist you today?
[0m

In [None]:
response

AgentChatResponse(response='Hello! How can I assist you today?', sources=[], source_nodes=[], is_dummy_stream=False, metadata=None)