# Component 5: Comprehensive Benchmark Comparison

**Compares VQ, PQ, and LSH across all experiments**

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (8, 5)
results_dir = Path("Data/results")

## Load All Results

In [None]:
# Experiment 1: Accuracy vs Efficiency
vq_exp1 = pd.read_csv(results_dir / "vq_accuracy_efficiency.csv")
pq_exp1 = pd.read_csv(results_dir / "pq_accuracy_efficiency.csv")
lsh_exp1 = pd.read_csv(results_dir / "lsh_accuracy_efficiency.csv")
df_exp1 = pd.concat([vq_exp1, pq_exp1, lsh_exp1], ignore_index=True)

print("=== Experiment 1: Accuracy vs Efficiency ===")
display(df_exp1)

# Experiment 2: Scaling with N
vq_exp2 = pd.read_csv(results_dir / "vq_scaling_N.csv")
pq_exp2 = pd.read_csv(results_dir / "pq_scaling_N.csv")
lsh_exp2 = pd.read_csv(results_dir / "lsh_scaling_N.csv")
df_exp2 = pd.concat([vq_exp2, pq_exp2, lsh_exp2], ignore_index=True)

print("\n=== Experiment 2: Scaling with N ===")
display(df_exp2)

# Experiment 3: Scaling with Dimensionality
vq_exp3 = pd.read_csv(results_dir / "vq_scaling_dim.csv")
pq_exp3 = pd.read_csv(results_dir / "pq_scaling_dim.csv")
lsh_exp3 = pd.read_csv(results_dir / "lsh_scaling_dim.csv")
df_exp3 = pd.concat([vq_exp3, pq_exp3, lsh_exp3], ignore_index=True)

print("\n=== Experiment 3: Scaling with Dimensionality ===")
display(df_exp3)

## Plot 1: Accuracy vs Efficiency

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Recall vs Candidate Ratio
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp1[df_exp1["method"] == method]
    axes[0].plot(subset["candidate_ratio"], subset["recall_at_k"], 
                 marker="o", label=method, linewidth=2, markersize=8)

axes[0].set(xlabel="Candidate Ratio", ylabel="Recall@10",
            title="Accuracy vs Efficiency")
axes[0].legend(fontsize=11)
axes[0].grid(alpha=0.3)

# nDCG vs Candidate Ratio
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp1[df_exp1["method"] == method]
    axes[1].plot(subset["candidate_ratio"], subset["ndcg_at_k"], 
                 marker="s", label=method, linewidth=2, markersize=8)

axes[1].set(xlabel="Candidate Ratio", ylabel="nDCG@10",
            title="Ranking Quality")
axes[1].legend(fontsize=11)
axes[1].grid(alpha=0.3)

# Recall vs Query Time
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp1[df_exp1["method"] == method]
    axes[2].plot(subset["query_time"], subset["recall_at_k"], 
                 marker="^", label=method, linewidth=2, markersize=8)

axes[2].set(xlabel="Query Time (s)", ylabel="Recall@10",
            title="Speed vs Accuracy")
axes[2].legend(fontsize=11)
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.savefig(results_dir / "comparison_accuracy_efficiency.png", dpi=200, bbox_inches="tight")
plt.show()

## Plot 2: Scaling with N

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Build Time vs N
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp2[df_exp2["method"] == method].sort_values("N")
    ax1.plot(subset["N"], subset["build_time"], 
             marker="o", label=method, linewidth=2, markersize=8)

ax1.set(xlabel="N (documents)", ylabel="Build Time (s)",
        title="Index Construction Time")
ax1.legend(fontsize=11)
ax1.grid(alpha=0.3)

# Query Time vs N
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp2[df_exp2["method"] == method].sort_values("N")
    ax2.plot(subset["N"], subset["query_time"], 
             marker="s", label=method, linewidth=2, markersize=8)

ax2.set(xlabel="N (documents)", ylabel="Query Time (s)",
        title="Query Performance")
ax2.legend(fontsize=11)
ax2.grid(alpha=0.3)

plt.tight_layout()
plt.savefig(results_dir / "comparison_scaling_N.png", dpi=200, bbox_inches="tight")
plt.show()

## Plot 3: Scaling with Dimensionality

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Build Time vs Dim
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp3[df_exp3["method"] == method].sort_values("dim")
    ax1.plot(subset["dim"], subset["build_time"], 
             marker="o", label=method, linewidth=2, markersize=8)

ax1.set(xlabel="Dimensionality", ylabel="Build Time (s)",
        title="Index Construction vs Dimensionality")
ax1.legend(fontsize=11)
ax1.grid(alpha=0.3)

# Query Time vs Dim
for method in ["VQ", "PQ", "LSH"]:
    subset = df_exp3[df_exp3["method"] == method].sort_values("dim")
    ax2.plot(subset["dim"], subset["query_time"], 
             marker="s", label=method, linewidth=2, markersize=8)

ax2.set(xlabel="Dimensionality", ylabel="Query Time (s)",
        title="Query Performance vs Dimensionality")
ax2.legend(fontsize=11)
ax2.grid(alpha=0.3)

plt.tight_layout()
plt.savefig(results_dir / "comparison_scaling_dim.png", dpi=200, bbox_inches="tight")
plt.show()

## Plot 4: Summary Comparison

In [None]:
methods = ["VQ", "PQ", "LSH"]
x = np.arange(len(methods))
width = 0.35

# Best configuration per method
best_configs = df_exp1.loc[df_exp1.groupby("method")["recall_at_k"].idxmax()]

# Performance at N=10,000
perf_10k = df_exp2[df_exp2["N"] == 10000]

# Performance at d=200
perf_200d = df_exp3[df_exp3["dim"] == 200]

def get_values(df, col):
    return [df[df["method"] == m][col].values[0] if len(df[df["method"] == m]) > 0 
            else np.nan for m in methods]

best_recalls = get_values(best_configs, "recall_at_k")
best_ndcgs = get_values(best_configs, "ndcg_at_k")
build_10k = get_values(perf_10k, "build_time")
query_200d = get_values(perf_200d, "query_time")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Best Recall
axes[0,0].bar(x, best_recalls, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
axes[0,0].set_xticks(x)
axes[0,0].set_xticklabels(methods)
axes[0,0].set_ylabel("Recall@10")
axes[0,0].set_title("Best Recall per Method")
axes[0,0].set_ylim([0, 1.0])
axes[0,0].grid(axis="y", alpha=0.3)

# Best nDCG
axes[0,1].bar(x, best_ndcgs, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
axes[0,1].set_xticks(x)
axes[0,1].set_xticklabels(methods)
axes[0,1].set_ylabel("nDCG@10")
axes[0,1].set_title("Best nDCG per Method")
axes[0,1].set_ylim([0, 1.0])
axes[0,1].grid(axis="y", alpha=0.3)

# Build Time at N=10k
axes[1,0].bar(x, build_10k, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
axes[1,0].set_xticks(x)
axes[1,0].set_xticklabels(methods)
axes[1,0].set_ylabel("Build Time (s)")
axes[1,0].set_title("Build Time at N=10,000")
axes[1,0].grid(axis="y", alpha=0.3)

# Query Time at d=200
axes[1,1].bar(x, query_200d, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
axes[1,1].set_xticks(x)
axes[1,1].set_xticklabels(methods)
axes[1,1].set_ylabel("Query Time (s)")
axes[1,1].set_title("Query Time at d=200")
axes[1,1].grid(axis="y", alpha=0.3)

plt.tight_layout()
plt.savefig(results_dir / "comparison_summary.png", dpi=200, bbox_inches="tight")
plt.show()

## Summary Tables

In [None]:
# Best configuration per method
best_configs = df_exp1.loc[df_exp1.groupby("method")["recall_at_k"].idxmax()]

summary = best_configs[["method", "recall_at_k", "ndcg_at_k", "candidate_ratio", "query_time"]].copy()
summary.columns = ["Method", "Recall@10", "nDCG@10", "Candidate Ratio", "Query Time (s)"]

print("\n" + "="*70)
print("BEST CONFIGURATIONS (HIGHEST RECALL)")
print("="*70)
display(summary.reset_index(drop=True))

# Performance at N=10,000
perf_10k = df_exp2[df_exp2["N"] == 10000][["method", "build_time", "query_time"]].copy()
perf_10k.columns = ["Method", "Build Time (s)", "Query Time (s)"]

print("\n" + "="*70)
print("PERFORMANCE AT N=10,000 DOCUMENTS")
print("="*70)
display(perf_10k.reset_index(drop=True))

# Performance at d=200
perf_200d = df_exp3[df_exp3["dim"] == 200][["method", "build_time", "query_time"]].copy()
perf_200d.columns = ["Method", "Build Time (s)", "Query Time (s)"]

print("\n" + "="*70)
print("PERFORMANCE AT d=200 DIMENSIONS")
print("="*70)
display(perf_200d.reset_index(drop=True))

## Key Insights

In [None]:
print("\n" + "="*70)
print("KEY FINDINGS")
print("="*70)

best = df_exp1.loc[df_exp1.groupby("method")["recall_at_k"].idxmax()]

print("\n1. ACCURACY (Recall@10):")
for _, row in best.sort_values("recall_at_k", ascending=False).iterrows():
    print(f"   {row['method']:4s}: {row['recall_at_k']:.3f}")

print("\n2. RANKING QUALITY (nDCG@10):")
for _, row in best.sort_values("ndcg_at_k", ascending=False).iterrows():
    print(f"   {row['method']:4s}: {row['ndcg_at_k']:.3f}")

print("\n3. EFFICIENCY (Candidate Ratio):")
for _, row in best.sort_values("candidate_ratio").iterrows():
    print(f"   {row['method']:4s}: {row['candidate_ratio']:.3f} ({row['candidate_ratio']*100:.1f}% of docs)")

print("\n4. SPEED (Query Time):")
for _, row in best.sort_values("query_time").iterrows():
    print(f"   {row['method']:4s}: {row['query_time']:.4f}s")

print("\n" + "="*70)