In [None]:
import nest_asyncio

nest_asyncio.apply()

import os
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
import pandas as pd
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine, SubQuestionQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer
from llama_index.core.tools import QueryEngineTool, ToolMetadata

os.environ["OPENAI_API_KEY"] = ""

llm = OpenAI(model="gpt-4o-mini", temperature=0.0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

documents = SimpleDirectoryReader("data").load_data()
splitter = SentenceSplitter(chunk_size=512, chunk_overlap=20)
nodes = splitter.get_nodes_from_documents(documents)

df = pd.read_csv("questions/Lyft2021_queries.csv")
queries = df["Query"].tolist()

In [None]:
top_vector_index = VectorStoreIndex(nodes)
vector_retriever = top_vector_index.as_retriever(similarity_top_k=2)

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    verbose=True
)

response_synthesizer = get_response_synthesizer(
    response_mode="compact"
)
retriever_query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever,
    response_synthesizer=response_synthesizer
)

In [None]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=retriever_query_engine,
        metadata=ToolMetadata(
            name="recursive_retriever",
            description="Recursive retriever for accessing documents"
        ),
    ),
]

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    use_async=True
)

In [None]:
qa_dataset = []

for query in queries:
    retrieved_docs = query_engine.query(query).source_nodes
    qa_dataset.append({
        "query": query,
        "retrieved_context": [doc.node.node_id for doc in retrieved_docs]
    })

In [None]:
from llama_index.core.evaluation import ContextRelevancyEvaluator
from tqdm.asyncio import tqdm_asyncio

evaluator = ContextRelevancyEvaluator(llm=llm)
results = []

for qa_data in qa_dataset:
    result = evaluator.aevaluate(qa_data["query"], contexts=qa_data["retrieved_context"])
    results.append(result)

results = await tqdm_asyncio.gather(*results)
print(results)

In [None]:
from llama_index.core.evaluation import RetrieverEvaluator

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
evaluator = RetrieverEvaluator.from_metric_names(metrics, retriever=recursive_retriever)

eval_results = []
for entry in qa_dataset:
    query = entry["query"]
    context = entry["retrieved_context"]

    result = evaluator.evaluate(
        query=query,
        expected_ids=context
    )
    eval_results.append(result)

def display_results(name, eval_results):
    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df

In [None]:
display_results("recursive retrieval",  eval_results)