# Result Table Analysis

In [1]:
import pandas as pd
import os

# Define the result folder path
result_folder = 'result'

# Import all CSV files from the result folder
pipeline_summary = pd.read_csv(os.path.join(result_folder, 'pipeline_summary.csv'))
rag_evaluation_results = pd.read_csv(os.path.join(result_folder, 'rag_evaluation_results.csv'))
rag_evaluation_summary = pd.read_csv(os.path.join(result_folder, 'rag_evaluation_summary.csv'))
results_cutting_plane = pd.read_csv(os.path.join(result_folder, 'results_retrieval_results_cutting_plane.csv'))
results_robust_l1 = pd.read_csv(os.path.join(result_folder, 'results_retrieval_results_robust_l1.csv'))
results_robust_l2 = pd.read_csv(os.path.join(result_folder, 'results_retrieval_results_robust_l2.csv'))
results_robust_linf = pd.read_csv(os.path.join(result_folder, 'results_retrieval_results_robust_linf.csv'))
results_topk = pd.read_csv(os.path.join(result_folder, 'results_retrieval_results_topk.csv'))

# Create a dictionary of all dataframes for easy access
result_dfs = {
    'pipeline_summary': pipeline_summary,
    'rag_evaluation_results': rag_evaluation_results,
    'rag_evaluation_summary': rag_evaluation_summary,
    'results_cutting_plane': results_cutting_plane,
    'results_robust_l1': results_robust_l1,
    'results_robust_l2': results_robust_l2,
    'results_robust_linf': results_robust_linf,
    'results_topk': results_topk
}

print(f"Loaded {len(result_dfs)} CSV files from '{result_folder}' folder:")
for name, df in result_dfs.items():
    print(f"  - {name}: {df.shape[0]} rows, {df.shape[1]} columns")

Loaded 8 CSV files from 'result' folder:
  - pipeline_summary: 5 rows, 8 columns
  - rag_evaluation_results: 400 rows, 16 columns
  - rag_evaluation_summary: 400 rows, 5 columns
  - results_cutting_plane: 360 rows, 20 columns
  - results_robust_l1: 240 rows, 20 columns
  - results_robust_l2: 240 rows, 20 columns
  - results_robust_linf: 240 rows, 20 columns
  - results_topk: 100 rows, 18 columns


In [4]:
# Define metrics columns to aggregate
metric_cols = [
    'cosine_similarity',
    'manhattan_distance', 'bertscore_precision', 'bertscore_recall', 'bertscore_f1'
]

# Aggregate results_topk by 'parameter'
topk_params = ['parameter']
topk_agg = results_topk.groupby(topk_params)[metric_cols].mean().reset_index()
topk_agg.columns = topk_params + ['avg_' + col for col in metric_cols]
print("results_topk aggregated:")
display(topk_agg)

# Aggregate results_robust_l1 by parameters
robust_params = ['param_rho_vec', 'param_lambda', 'param_rho_div']
robust_l1_agg = results_robust_l1.groupby(robust_params)[metric_cols].mean().reset_index()
robust_l1_agg.columns = robust_params + ['avg_' + col for col in metric_cols]
print("\nresults_robust_l1 aggregated:")
display(robust_l1_agg)

# Aggregate results_robust_l2 by parameters
robust_l2_agg = results_robust_l2.groupby(robust_params)[metric_cols].mean().reset_index()
robust_l2_agg.columns = robust_params + ['avg_' + col for col in metric_cols]
print("\nresults_robust_l2 aggregated:")
display(robust_l2_agg)

# Aggregate results_robust_linf by parameters
robust_linf_agg = results_robust_linf.groupby(robust_params)[metric_cols].mean().reset_index()
robust_linf_agg.columns = robust_params + ['avg_' + col for col in metric_cols]
print("\nresults_robust_linf aggregated:")
display(robust_linf_agg)

# Aggregate results_cutting_plane by parameters
cutting_plane_params = ['param_k', 'param_lambda', 'param_rho_div']
cutting_plane_agg = results_cutting_plane.groupby(cutting_plane_params)[metric_cols].mean().reset_index()
cutting_plane_agg.columns = cutting_plane_params + ['avg_' + col for col in metric_cols]
print("\nresults_cutting_plane aggregated:")
display(cutting_plane_agg)

results_topk aggregated:


Unnamed: 0,parameter,avg_cosine_similarity,avg_manhattan_distance,avg_bertscore_precision,avg_bertscore_recall,avg_bertscore_f1
0,2,0.835223,9.417429,0.837156,0.853954,0.843646
1,3,0.864871,9.14924,0.837012,0.871478,0.851747
2,5,0.89354,8.175146,0.83382,0.881487,0.856484
3,7,0.899301,7.996384,0.838718,0.892908,0.864658
4,10,0.924604,7.313129,0.842894,0.895312,0.867977



results_robust_l1 aggregated:


Unnamed: 0,param_rho_vec,param_lambda,param_rho_div,avg_cosine_similarity,avg_manhattan_distance,avg_bertscore_precision,avg_bertscore_recall,avg_bertscore_f1
0,0.02,0.5,0.8,0.90531,8.310421,0.830321,0.874551,0.850111
1,0.02,0.5,0.9,0.90531,8.310421,0.830321,0.874551,0.850111
2,0.02,0.65,0.8,0.849002,8.543351,0.85946,0.867806,0.861868
3,0.02,0.65,0.9,0.849002,8.543351,0.85946,0.867806,0.861868
4,0.02,0.72,0.8,0.853331,8.214398,0.870126,0.871231,0.868958
5,0.02,0.72,0.9,0.853331,8.214398,0.870126,0.871231,0.868958
6,0.05,0.5,0.8,0.90531,8.310421,0.830321,0.874551,0.850111
7,0.05,0.5,0.9,0.90531,8.310421,0.830321,0.874551,0.850111
8,0.05,0.65,0.8,0.862298,8.197897,0.86506,0.869158,0.865297
9,0.05,0.65,0.9,0.85151,8.447031,0.862357,0.868162,0.863458



results_robust_l2 aggregated:


Unnamed: 0,param_rho_vec,param_lambda,param_rho_div,avg_cosine_similarity,avg_manhattan_distance,avg_bertscore_precision,avg_bertscore_recall,avg_bertscore_f1
0,0.02,0.5,0.8,0.919016,7.855863,0.835309,0.886448,0.858159
1,0.02,0.5,0.9,0.919016,7.855863,0.835309,0.886448,0.858159
2,0.02,0.65,0.8,0.851599,8.50261,0.860889,0.865701,0.861494
3,0.02,0.65,0.9,0.851599,8.50261,0.860889,0.865701,0.861494
4,0.02,0.72,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
5,0.02,0.72,0.9,0.853331,8.214398,0.870126,0.871231,0.868958
6,0.05,0.5,0.8,0.921844,7.458907,0.840264,0.883362,0.859213
7,0.05,0.5,0.9,0.911367,7.75545,0.840013,0.873335,0.854462
8,0.05,0.65,0.8,0.853331,8.214398,0.870126,0.871231,0.868958
9,0.05,0.65,0.9,0.853331,8.214398,0.870126,0.871231,0.868958



results_robust_linf aggregated:


Unnamed: 0,param_rho_vec,param_lambda,param_rho_div,avg_cosine_similarity,avg_manhattan_distance,avg_bertscore_precision,avg_bertscore_recall,avg_bertscore_f1
0,0.02,0.5,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
1,0.02,0.5,0.9,0.855062,7.926186,0.879363,0.876761,0.876423
2,0.02,0.65,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
3,0.02,0.65,0.9,0.855062,7.926186,0.879363,0.876761,0.876423
4,0.02,0.72,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
5,0.02,0.72,0.9,0.855062,7.926186,0.879363,0.876761,0.876423
6,0.05,0.5,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
7,0.05,0.5,0.9,0.855062,7.926186,0.879363,0.876761,0.876423
8,0.05,0.65,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
9,0.05,0.65,0.9,0.855062,7.926186,0.879363,0.876761,0.876423



results_cutting_plane aggregated:


Unnamed: 0,param_k,param_lambda,param_rho_div,avg_cosine_similarity,avg_manhattan_distance,avg_bertscore_precision,avg_bertscore_recall,avg_bertscore_f1
0,20,0.5,0.8,0.905772,7.261505,0.85965,0.890416,0.87365
1,20,0.5,0.9,0.905772,7.261505,0.85965,0.890416,0.87365
2,20,0.65,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
3,20,0.65,0.9,0.853331,8.214398,0.870126,0.871231,0.868958
4,20,0.7,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
5,20,0.7,0.9,0.855062,7.926186,0.879363,0.876761,0.876423
6,40,0.5,0.8,0.869674,7.851346,0.865619,0.880225,0.872208
7,40,0.5,0.9,0.858886,8.10048,0.862916,0.879229,0.870369
8,40,0.65,0.8,0.855062,7.926186,0.879363,0.876761,0.876423
9,40,0.65,0.9,0.855062,7.926186,0.879363,0.876761,0.876423
