## Evaluation visual-retrivers (CI 95%)

In [None]:
import pandas as pd
from functions import (
    run_ci_summary,
    DEFAULT_PRICE_DICT,
    get_metric_descriptions,
)

In [None]:
TOPK_CONFIG = {
    5: {
        'data_path': 'results/evals_topk5/',
        'summary_path': 'results/summary_CI_topk5.xlsx',
    },
    10: {
        'data_path': 'evals/evals_topk10/',
        'summary_path': 'results/summary_CI_topk10.xlsx',
    },
}

TOP_K = 5
CONFIG = TOPK_CONFIG[TOP_K]
DATA_PATH = CONFIG['data_path']
SUMMARY_FILE = CONFIG['summary_path']
PRECISION_LABEL = f'P@{TOP_K}'

GROUP_BY_COLS = ['Model', 'Model_ret', 'Difficulty']
MODEL_ORDER = ['gpt-5', 'gpt-5-mini', 'gpt-5-nano']
RETRIEVER_ORDER = [
    'vidore/colpali-v1.3-merged',
    'vidore/colqwen2.5-v0.2',
    'ahmed-masry/ColFlor',
]
DISPLAY_COLUMNS = [
    'Cor_answer',
    'Elapsed',
    'Total_tokens',
    PRECISION_LABEL,
    'Throughput',
    'Cost',
    'Price-per-cost',
]
GROUP_BY_NO_DIFFICULTY = [col for col in GROUP_BY_COLS if col != 'Difficulty']
metric_descriptions = get_metric_descriptions(TOP_K)

In [None]:
per_iteration_summary, summary_table, merged_df = run_ci_summary(
    path=DATA_PATH,
    group_by_cols=GROUP_BY_COLS,
    price_dict=DEFAULT_PRICE_DICT,
    top_k=TOP_K,
    model_order=MODEL_ORDER,
    retriever_order=RETRIEVER_ORDER,
    precision_label=PRECISION_LABEL,
)
summary_table_display = summary_table[GROUP_BY_COLS + DISPLAY_COLUMNS]

In [None]:
per_iteration_summary

In [None]:
summary_table_display

In [None]:
if 'Difficulty' in GROUP_BY_COLS:
    summary_table.set_index(GROUP_BY_COLS)[DISPLAY_COLUMNS].unstack('Difficulty')

In [None]:
summary_table_no_diff = None
if GROUP_BY_NO_DIFFICULTY != GROUP_BY_COLS:
    per_iteration_no_diff, summary_table_no_diff, _ = run_ci_summary(
        path=DATA_PATH,
        group_by_cols=GROUP_BY_NO_DIFFICULTY,
        price_dict=DEFAULT_PRICE_DICT,
        top_k=TOP_K,
        model_order=MODEL_ORDER,
        retriever_order=RETRIEVER_ORDER,
        dataframe=merged_df,
        precision_label=PRECISION_LABEL,
    )
    summary_table_no_diff = summary_table_no_diff[GROUP_BY_NO_DIFFICULTY + DISPLAY_COLUMNS]
summary_table_no_diff

In [None]:
merged_df

In [None]:
metric_descriptions_df = pd.DataFrame(
    [
        {'Metric': metric, 'Description': description}
        for metric, description in metric_descriptions.items()
    ]
)
metric_descriptions_df

In [None]:
excel_path = SUMMARY_FILE

with pd.ExcelWriter(excel_path) as writer:
    if 'Difficulty' in GROUP_BY_COLS:
        pivot_df = summary_table.set_index(GROUP_BY_COLS)[DISPLAY_COLUMNS].unstack('Difficulty')
        pivot_df.to_excel(writer, sheet_name=f'diff_topk{TOP_K}')
    else:
        summary_table_display.to_excel(writer, sheet_name=f'diff_topk{TOP_K}', index=False)

    if summary_table_no_diff is not None:
        summary_table_no_diff.to_excel(writer, sheet_name=f'overview_topk{TOP_K}', index=False)

    per_iteration_summary.to_excel(writer, sheet_name='per_iteration', index=False)
    merged_df.to_excel(writer, sheet_name='raw_data', index=False)
    metric_descriptions_df.to_excel(writer, sheet_name='Descriptions', index=False)

excel_path