In [19]:
import json
from pathlib import Path
import pandas as pd
from matplotlib import pyplot as plt
from benchmark.utils.metadata import read_preproc, read_python, read_lean, read_unit, read_qa_plausible_theorems

In [20]:
def collate_metadata(base_path: str) -> pd.DataFrame:
    """
    Collates metadata.json files across indices into a single pandas DataFrame.
    
    Parameters:
    base_path (str): Base path containing indexed directories with metadata.json files
    
    Returns:
    pandas.DataFrame: Table with metadata information across all indices
    """
    # Initialize lists to store data
    records = []
    
    # Iterate through all metadata.json files
    base_dir = Path(base_path)
    for idx_dir in sorted(base_dir.glob("*")):
        if not idx_dir.is_dir():
            continue

        preproc = read_preproc(idx_dir)
        python = read_python(idx_dir)
        lean = read_lean(idx_dir)
        unit = read_unit(idx_dir)
        try: 
            plausible_theorems = read_qa_plausible_theorems(idx_dir)
        except: 
            plausible_theorems = []

        # Create a flat record
        record = {
            "idx": idx_dir.name, 
            "preproc_loops": preproc["loops"],
            "python_loops": python["loops"], 
            "lean_loops": lean["loops"],
            "unit_loops": unit["loops"],
            "preproc_success": preproc["latest_run_success"],
            "python_success": python["latest_run_success"],
            "lean_success": lean["latest_run_success"],
            "unit_success": unit["latest_run_success"],
            "plausible_theorems": plausible_theorems
        }
        records.append(record)
    
    # Create DataFrame
    return pd.DataFrame(records).set_index("idx")

In [21]:
up = Path("..")
path = up / up / up / "artefacts" / "apps" / "train"

In [22]:
df = collate_metadata(path)

In [23]:
df

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,unit_loops,preproc_success,python_success,lean_success,unit_success,plausible_theorems
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0000,0,1,53,25,True,True,True,False,[]
0001,0,1,6,0,True,True,True,True,"[result_bound, result_parity, insufficient_mov..."
0002,0,2,5,10,True,True,True,False,[]
0003,2,1,1,4,True,True,True,True,[]
0004,0,2,0,10,True,True,True,False,[]
...,...,...,...,...,...,...,...,...,...
4995,0,1,0,0,True,True,True,False,[]
4996,0,1,2,10,True,True,True,False,[]
4997,2,0,2,8,True,True,True,True,"[sum_divisors_positive, equal_sigma1_propertie..."
4998,0,0,1,1,True,True,True,True,"[result_are_strings, exact_vowel_count, exact_..."


In [14]:
df.drop("plausible_theorems", axis=1).mean()

preproc_loops      0.3390
python_loops       0.8612
lean_loops         1.3194
unit_loops         6.6530
preproc_success    0.9878
python_success     0.9804
lean_success       0.9430
unit_success       0.4178
dtype: float64

In [15]:
df.drop("plausible_theorems", axis=1).sum()

preproc_loops       1695
python_loops        4306
lean_loops          6597
unit_loops         33265
preproc_success     4939
python_success      4902
lean_success        4715
unit_success        2089
dtype: int64

In [16]:
df[df.lean_success == False]

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,unit_loops,preproc_success,python_success,lean_success,unit_success,plausible_theorems
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0062,0,0,5,0,True,True,False,False,[]
0106,0,0,5,0,True,True,False,False,[]
0158,0,1,5,0,True,True,False,False,[]
0188,0,0,5,0,True,True,False,False,[]
0258,0,5,0,0,True,False,False,False,[]
...,...,...,...,...,...,...,...,...,...
4971,0,0,5,0,True,True,False,False,[]
4973,5,0,0,0,False,False,False,False,[]
4980,0,3,5,0,True,True,False,False,[]
4985,5,0,0,0,False,False,False,False,[]


In [17]:
df.python_loops.value_counts()

python_loops
0    2113
1    1986
2     603
3     150
4      76
5      72
Name: count, dtype: int64

In [18]:
df[df.python_loops == 0]

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,unit_loops,preproc_success,python_success,lean_success,unit_success,plausible_theorems
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0009,0,0,2,8,True,True,True,True,"[result_not_exceed_input_length, result_nonneg..."
0012,0,0,1,10,True,True,True,False,[]
0013,0,0,0,2,True,True,True,True,"[result_at_least_n, result_at_least_high_quali..."
0016,0,0,5,10,True,True,True,False,[]
0019,0,0,1,10,True,True,True,False,[]
...,...,...,...,...,...,...,...,...,...
4992,0,0,3,10,True,True,True,False,[]
4993,0,0,0,10,True,True,True,False,[]
4994,0,0,0,10,True,True,True,False,[]
4997,2,0,2,8,True,True,True,True,"[sum_divisors_positive, equal_sigma1_propertie..."


In [33]:
df.lean_loops.value_counts()

lean_loops
0    1833
1    1450
2     842
3     394
5     298
4     183
Name: count, dtype: int64

In [34]:
df[df.lean_success == False]

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,preproc_success,python_success,lean_success
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0062,0,0,5,True,True,False
0106,0,0,5,True,True,False
0158,0,1,5,True,True,False
0188,0,0,5,True,True,False
0258,0,5,0,True,False,False
...,...,...,...,...,...,...
4971,0,0,5,True,True,False
4973,5,0,0,False,False,False
4980,0,3,5,True,True,False
4985,5,0,0,False,False,False


In [50]:
df.to_csv("df.csv")

# Viz

In [49]:
plt.boxplot(df[["python_loops", "lean_loops"]])

{'whiskers': [<matplotlib.lines.Line2D at 0x7f364b9b9d30>,
  <matplotlib.lines.Line2D at 0x7f364b9b8ce0>,
  <matplotlib.lines.Line2D at 0x7f364b9bb830>,
  <matplotlib.lines.Line2D at 0x7f364b9bb8c0>],
 'caps': [<matplotlib.lines.Line2D at 0x7f364b9b9040>,
  <matplotlib.lines.Line2D at 0x7f364b9b8f20>,
  <matplotlib.lines.Line2D at 0x7f364b9bbdd0>,
  <matplotlib.lines.Line2D at 0x7f364b9bbfe0>],
 'boxes': [<matplotlib.lines.Line2D at 0x7f364b9ba2a0>,
  <matplotlib.lines.Line2D at 0x7f364b9bb560>],
 'medians': [<matplotlib.lines.Line2D at 0x7f364b9b9430>,
  <matplotlib.lines.Line2D at 0x7f364b9ba510>],
 'fliers': [<matplotlib.lines.Line2D at 0x7f364b9b9700>,
  <matplotlib.lines.Line2D at 0x7f364b9ba7e0>],
 'means': []}