# Retrieval Evaluation: Precision@5 and nDCG@5

This notebook computes retrieval metrics for sparse vs. dense retrievers using your existing ranking data.

Metrics included:
- **Precision@5 (P@5)**
- **nDCG@5**

Expected columns in your CSV:
- `query_id`
- `retriever` (e.g., "sparse", "dense")
- `rank` (1 = top)
- `relevance` (0/1)

> Update the path in the next cell to point to your actual CSV.


In [12]:
import pandas as pd
import numpy as np
from pathlib import Path

# Update this path to your real data file
DATA_PATH = Path("retriever_result/retrieval_results_filled.csv")

# Column names
COL_QUERY_ID = "query_id"
COL_RETRIEVER = "retriever"
COL_RANK = "rank"
COL_REL = "is_relevant"

In [13]:
# Load retrieval data
df = pd.read_csv(DATA_PATH)

print("Loaded dataset:")
display(df.head())

print("\nDistinct retrievers:", df[COL_RETRIEVER].unique())
print("Queries:", df[COL_QUERY_ID].nunique())
print("Rows:", len(df))

Loaded dataset:


Unnamed: 0,query_id,query_text,retriever,rank,chunk_id,text,score,is_relevant
0,q001,long method with many if statements in Python,sparse,1,Fluent.Python.2nd.Edition.(z-lib.org).pdf:p928_c1,EAFP Easier to ask for forgiveness than permis...,0.091903,1
1,q001,long method with many if statements in Python,sparse,2,cc_knowledge_book.pdf:p141_c2,problems upon our callers. All it takes is one...,0.073481,1
2,q001,long method with many if statements in Python,sparse,3,Fluent.Python.2nd.Edition.(z-lib.org).pdf:p950_c1,Chapter Summary This chapter started easily en...,0.063037,1
3,q001,long method with many if statements in Python,sparse,4,cc_knowledge_book.pdf:p293_c1,262 Chapter 15: JUnit Internals We replaced th...,0.062122,1
4,q001,long method with many if statements in Python,sparse,5,Fluent.Python.2nd.Edition.(z-lib.org).pdf:p924_c1,Chapter 18. Context Managers and else Blocks A...,0.060011,1



Distinct retrievers: ['sparse']
Queries: 30
Rows: 150


In [14]:
def dcg_at_k(relevances, k=5):
    relevances = np.asarray(relevances)[:k]
    discounts = np.log2(np.arange(2, len(relevances) + 2))
    return np.sum(relevances / discounts)

def ndcg_at_k(relevances, k=5):
    dcg = dcg_at_k(relevances, k)
    ideal = sorted(relevances, reverse=True)
    idcg = dcg_at_k(ideal, k)
    return 0 if idcg == 0 else dcg / idcg

def precision_at_k(relevances, k=5):
    relevances = np.asarray(relevances)[:k]
    return np.mean(relevances)

In [15]:
K = 5
metrics_per_query = []

grouped = df.groupby([COL_RETRIEVER, COL_QUERY_ID])

for (retriever, qid), group in grouped:
    group_sorted = group.sort_values(by=COL_RANK)
    relevances = group_sorted[COL_REL].values

    p5 = precision_at_k(relevances, K)
    ndcg5 = ndcg_at_k(relevances, K)

    metrics_per_query.append({
        "retriever": retriever,
        "query_id": qid,
        "P@5": p5,
        "nDCG@5": ndcg5,
    })

metrics_df = pd.DataFrame(metrics_per_query)
display(metrics_df.head())

Unnamed: 0,retriever,query_id,P@5,nDCG@5
0,sparse,q001,1.0,1.0
1,sparse,q002,0.2,1.0
2,sparse,q003,0.8,1.0
3,sparse,q004,1.0,1.0
4,sparse,q005,0.8,0.95583


In [16]:
summary = (
    metrics_df.groupby("retriever")
    .agg(
        mean_P5=("P@5", "mean"),
        std_P5=("P@5", "std"),
        mean_nDCG5=("nDCG@5", "mean"),
        std_nDCG5=("nDCG@5", "std"),
        queries=("query_id", "nunique")
    )
    .reset_index()
)

print("Retrieval performance summary:")
display(summary)

Retrieval performance summary:


Unnamed: 0,retriever,mean_P5,std_P5,mean_nDCG5,std_nDCG5,queries
0,sparse,0.813333,0.251524,0.944222,0.101434,30


In [17]:
def to_latex_table(summary_df, caption="Retrieval Metrics", label="tab:retrieval"):
    rows = []
    for _, row in summary_df.iterrows():
        rows.append(f"{row['retriever']} & {row['mean_P5']:.2f} & {row['mean_nDCG5']:.2f} \\")
    body = "\n".join(rows)

    return rf"""
\n\begin{{table}}[t]
\centering
\caption{{{caption}}}
\label{{{label}}}
\begin{{tabular}}{{lcc}}
\hline
\textbf{{Retriever}} & \textbf{{P@5}} & \textbf{{nDCG@5}} \\
\hline
{body}
\\hline
\end{{tabular}}
\end{{table}}
"""

latex_output = to_latex_table(summary)
print(latex_output)


\n\begin{table}[t]
\centering
\caption{Retrieval Metrics}
\label{tab:retrieval}
\begin{tabular}{lcc}
\hline
\textbf{Retriever} & \textbf{P@5} & \textbf{nDCG@5} \\
\hline
sparse & 0.81 & 0.94 \
\\hline
\end{tabular}
\end{table}



In [18]:
metrics_df.to_csv("evals/retrieval_metrics_per_query.csv", index=False)
summary.to_csv("evals/retrieval_metrics_summary.csv", index=False)

print("Saved output files:")
print(" - evals/retrieval_metrics_per_query.csv")
print(" - evals/retrieval_metrics_summary.csv")

OSError: Cannot save file into a non-existent directory: 'evals'