In [1]:
import pandas as pd
import json
from pathlib import Path
from benchmark.utils.metadata import read_preproc, read_python, read_lean

2024-10-31 17:39:31,391 - root - DEBUG - Running in Jupyter notebook - console logging enabled


In [2]:
def collate_metadata(base_path: str) -> pd.DataFrame:
    """
    Collates metadata.json files across indices into a single pandas DataFrame.
    
    Parameters:
    base_path (str): Base path containing indexed directories with metadata.json files
    
    Returns:
    pandas.DataFrame: Table with metadata information across all indices
    """
    # Initialize lists to store data
    records = []
    
    # Iterate through all metadata.json files
    base_dir = Path(base_path)
    for idx_dir in sorted(base_dir.glob("*")):
        if not idx_dir.is_dir():
            continue

        preproc = read_preproc(idx_dir)
        python = read_python(idx_dir)
        lean = read_lean(idx_dir)

        # Create a flat record
        record = {
            "idx": idx_dir.name, 
            "preproc_loops": preproc["loops"],
            "python_loops": python["loops"], 
            "lean_loops": lean["loops"],
            "preproc_success": preproc["latest_run_success"],
            "python_success": python["latest_run_success"],
            "lean_success": lean["latest_run_success"]
        }
        records.append(record)
    
    # Create DataFrame
    return pd.DataFrame(records).set_index("idx")

In [3]:
up = Path("..")
path = up / up / up / "artefacts" / "apps" / "train"

In [4]:
df = collate_metadata(path)

In [5]:
df

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,preproc_success,python_success,lean_success
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0000,0,1,3,True,True,True
0001,0,1,2,True,True,True
0002,0,2,0,True,True,True
0003,2,1,1,True,True,True
0004,0,2,0,True,True,True
...,...,...,...,...,...,...
4995,0,1,0,True,True,True
4996,0,1,2,True,True,True
4997,2,0,2,True,True,True
4998,0,0,1,True,True,True


In [6]:
df.mean()

preproc_loops      0.3390
python_loops       0.8612
lean_loops         1.3076
preproc_success    0.9878
python_success     0.9804
lean_success       0.9430
dtype: float64

In [7]:
df.sum()

preproc_loops      1695
python_loops       4306
lean_loops         6538
preproc_success    4939
python_success     4902
lean_success       4715
dtype: int64

In [20]:
df[df.lean_success == False]

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,preproc_success,python_success,lean_success
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0062,0,0,5,True,True,False
0106,0,0,5,True,True,False
0158,0,1,5,True,True,False
0188,0,0,5,True,True,False
0258,0,5,0,True,False,False
...,...,...,...,...,...,...
1905,0,1,5,True,True,False
1943,0,0,5,True,True,False
1946,0,0,5,True,True,False
1983,0,1,5,True,True,False


In [22]:
df.python_loops.value_counts()

python_loops
1    827
0    825
2    229
3     66
4     30
5     23
Name: count, dtype: int64

In [23]:
df[df.python_loops == 0]

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,preproc_success,python_success,lean_success
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0009,0,0,2,True,True,True
0012,0,0,1,True,True,True
0013,0,0,0,True,True,True
0016,0,0,5,True,True,True
0019,0,0,1,True,True,True
...,...,...,...,...,...,...
1981,0,0,2,True,True,True
1988,0,0,4,True,True,True
1997,0,0,1,True,True,True
1998,0,0,0,True,True,True


In [24]:
df.lean_loops.value_counts()

lean_loops
0    797
1    588
2    318
3    142
5     89
4     66
Name: count, dtype: int64

In [25]:
df[df.lean_success == False]

Unnamed: 0_level_0,preproc_loops,python_loops,lean_loops,preproc_success,python_success,lean_success
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0062,0,0,5,True,True,False
0106,0,0,5,True,True,False
0158,0,1,5,True,True,False
0188,0,0,5,True,True,False
0258,0,5,0,True,False,False
...,...,...,...,...,...,...
1905,0,1,5,True,True,False
1943,0,0,5,True,True,False
1946,0,0,5,True,True,False
1983,0,1,5,True,True,False


In [8]:
0.94 * 5000

4700.0