### Grid Search Evaluation for Chunk Size and Overlap 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("../data/eval/grid_search_results.csv")
df.info()

In [None]:
# Convert metrics to numeric (if needed)
for col in df.columns:
    if col not in ['chunk_size', 'overlap', 'query']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

In [None]:
# Aggregate metrics by chunk size and overlap
agg = df.groupby(['chunk_size', 'overlap']).mean(numeric_only=True).reset_index()

In [None]:
# Heatmap: Precision@5 by Chunk Size and Overlap
pivot = agg.pivot('chunk_size', 'overlap', 'precision@5')
plt.figure(figsize=(8,6))
sns.heatmap(pivot, annot=True, fmt='.2f', cmap='viridis')
plt.title('Precision@5 Heatmap')
plt.ylabel('Chunk Size')
plt.xlabel('Overlap')
plt.show()

In [None]:
# Heatmap: Recall@5 by Chunk Size and Overlap
pivot = agg.pivot('chunk_size', 'overlap', 'recall@5')
plt.figure(figsize=(8,6))
sns.heatmap(pivot, annot=True, fmt='.2f', cmap='magma')
plt.title('Recall@5 Heatmap')
plt.ylabel('Chunk Size')
plt.xlabel('Overlap')
plt.show()

In [None]:
# Line Plots: Other Metrics
metrics = ['MRR', 'nDCG', 'contextual_recall_sufficiency']
for metric in metrics:
    plt.figure(figsize=(8,5))
    for chunk_size in agg['chunk_size'].unique():
        subset = agg[agg['chunk_size'] == chunk_size]
        plt.plot(subset['overlap'], subset[metric], marker='o', label=f'Chunk {chunk_size}')
    plt.title(f'{metric} vs Overlap')
    plt.xlabel('Overlap')
    plt.ylabel(metric)
    plt.legend()
    plt.show()

In [None]:
# Recommendation: Best config by Precision@5
best = agg.sort_values('precision@5', ascending=False).iloc[0]
print(f"Recommended chunk size: {int(best['chunk_size'])}")
print(f"Recommended overlap: {int(best['overlap'])}")
print(f"Precision@5: {best['precision@5']:.3f}")
print(f"Recall@5: {best['recall@5']:.3f}")
print(f"MRR: {best['MRR']:.3f}")
print(f"nDCG: {best['nDCG']:.3f}")