In [1]:
import pandas as pd
from pathlib import Path

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
root = Path("results_per_section")

In [3]:
# load all csv files in that directory

dfs = [pd.read_csv(f"results_per_section/{fp.name}") for fp in root.glob("*.csv")]

In [4]:
# combine vertically

full = pd.concat(dfs, ignore_index=True)

In [5]:
summary_df = (full.groupby(["freq_subset", "rcond", "measure"], as_index=False)
                .agg(sum_accuracy=("overall_accuracy", "sum"),
                     avg_accuracy=("overall_accuracy", "mean"),
                     n_rows=("overall_accuracy", "size")))

In [6]:
summary_df.shape

(100, 6)

In [7]:
summary_df.head()

Unnamed: 0,freq_subset,rcond,measure,sum_accuracy,avg_accuracy,n_rows
0,10000,0.001,euclidean_distance,712.0,0.051952,13705
1,10000,0.001,mahalanobis_cosine,1821.0,0.132871,13705
2,10000,0.001,mahalanobis_distance,690.0,0.050347,13705
3,10000,0.001,mahalanobis_shifted_cosine,1895.0,0.138271,13705
4,10000,0.001,naive_cosine,1931.0,0.140897,13705


In [8]:
sorted_summary_df = summary_df.sort_values("avg_accuracy", ascending=False).reset_index(drop=True)

In [9]:
sorted_summary_df.head(10)

Unnamed: 0,freq_subset,rcond,measure,sum_accuracy,avg_accuracy,n_rows
0,100000,0.01,mahalanobis_cosine,2277.0,0.166144,13705
1,100000,0.001,mahalanobis_cosine,2277.0,0.166144,13705
2,100000,0.02,mahalanobis_cosine,2277.0,0.166144,13705
3,100000,0.005,mahalanobis_cosine,2277.0,0.166144,13705
4,100000,0.001,mahalanobis_shifted_cosine,2252.0,0.16432,13705
5,100000,0.02,mahalanobis_shifted_cosine,2252.0,0.16432,13705
6,100000,0.005,mahalanobis_shifted_cosine,2252.0,0.16432,13705
7,100000,0.01,mahalanobis_shifted_cosine,2252.0,0.16432,13705
8,50000,0.005,mahalanobis_cosine,2214.0,0.161547,13705
9,50000,0.02,mahalanobis_cosine,2214.0,0.161547,13705


In [14]:
# simple (un‑weighted) mean of avg_accuracy per measure

mean_by_measure_df = (
    sorted_summary_df
      .groupby("measure", as_index=False)["avg_accuracy"]
      .mean()
      .rename(columns={"avg_accuracy": "mean_avg_accuracy"})
)

In [15]:
print("Un‑weighted mean accuracy:")
mean_by_measure_df.sort_values("mean_avg_accuracy", ascending=False)

Un‑weighted mean accuracy:


Unnamed: 0,measure,mean_avg_accuracy
3,mahalanobis_shifted_cosine,0.154611
1,mahalanobis_cosine,0.154341
4,naive_cosine,0.140897
2,mahalanobis_distance,0.062028
0,euclidean_distance,0.051952


In [16]:
mean_by_measure_df.to_csv("measurements_accuracies.csv", index=False)

In [11]:
sorted_summary_df.to_csv("merged_results_measurements.csv", index=False)