In [None]:
import nest_asyncio

nest_asyncio.apply()

import os
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings

from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core import QueryBundle
from llama_index.core.indices.query.schema import QueryType

os.environ["OPENAI_API_KEY"] = ""
os.environ["COHERE_API_KEY"] = ""

llm = OpenAI(model="gpt-4o-mini", temperature=0.0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

documents = SimpleDirectoryReader("data").load_data()
splitter = SentenceSplitter(chunk_size=512, chunk_overlap=20)
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
reranker = CohereRerank(api_key=os.environ["COHERE_API_KEY"], top_n=2)

from llama_index.core.evaluation import generate_question_context_pairs

qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=1
)

queries = qa_dataset.queries.values()

In [None]:
from llama_index.core.retrievers import BaseRetriever, VectorIndexRetriever
from llama_index.core.schema import NodeWithScore
from typing import List

class CustomRetriever(BaseRetriever):
    def __init__(self, retriever: VectorIndexRetriever) -> None:
        self._retriever = retriever
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        retrieved_nodes = self._retriever.retrieve(query_bundle)
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

        return retrieved_nodes

    async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        return self._retrieve(query_bundle)

    async def aretrieve(self, str_or_query_bundle: QueryType) -> List[NodeWithScore]:
        if isinstance(str_or_query_bundle, str):
            str_or_query_bundle = QueryBundle(str_or_query_bundle)

        return await self._aretrieve(str_or_query_bundle)

In [None]:
vector_index = VectorStoreIndex(nodes)
vector_retriever = VectorIndexRetriever(
    index=vector_index,
    similarity_top_k=10
)

custom_retriever = CustomRetriever(vector_retriever)

from llama_index.core.evaluation import RetrieverEvaluator

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
evaluator = RetrieverEvaluator.from_metric_names(
    metrics, retriever=custom_retriever
)
results = await evaluator.aevaluate_dataset(qa_dataset)

In [None]:
import pandas as pd

def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df

In [None]:
display_results("rerank", results)