In [20]:
import os
import pandas as pd
import json


In [24]:
results_root = "./results/all"

# Get all model folders
folders = [f for f in os.listdir(results_root) if os.path.isdir(os.path.join(results_root, f))]
all_results = []

# Process each folder
for folder in folders:
    json_path = os.path.join(results_root, folder, 'all_results.json')
    if not os.path.exists(json_path):
        print(f"Skipping {folder}, no results {json_path} file found.")
        continue

    with open(json_path, 'r') as f:
        data = json.load(f)
        groups = data['groups']
        print(groups)
        # Extract results
        df = pd.DataFrame(
            {
                "model_name": folder,
                "blimp_supplement_acc": groups["blimp_supplement"]["acc,none"],
                "blimp_supplement_stderr": groups["blimp_supplement"]["acc_stderr,none"],
                "blimp_filtered_acc": groups["blimp_filtered"]["acc,none"],
                "blimp_filtered_stderr": groups["blimp_filtered"]["acc_stderr,none"],
                "ewok_filtered_acc": groups["ewok_filtered"]["acc,none"],
                "ewok_filtered_stderr": groups["ewok_filtered"]["acc_stderr,none"],
            },
            index=[0],
        )
        all_results.append(df)

# Combine all results
combined_results = pd.concat(all_results, ignore_index=True)
combined_results = combined_results.sort_values(by='model_name').reset_index(drop=True)
combined_results

{'ewok_filtered': {'acc,none': 0.498806338617783, 'acc_stderr,none': 0.005731827417496267, 'alias': 'ewok_filtered'}, 'blimp_supplement': {'acc,none': 0.5990337302428236, 'acc_stderr,none': 0.005262043444131852, 'alias': 'blimp_supplement'}, 'blimp_filtered': {'acc,none': 0.6933993827879917, 'acc_stderr,none': 0.0016156697990275413, 'alias': 'blimp_filtered'}}
{'ewok_filtered': {'acc,none': 0.502875831389563, 'acc_stderr,none': 0.005731205587920797, 'alias': 'ewok_filtered'}, 'blimp_supplement': {'acc,none': 0.6061930727592519, 'acc_stderr,none': 0.004981778196749111, 'alias': 'blimp_supplement'}, 'blimp_filtered': {'acc,none': 0.6941923683085492, 'acc_stderr,none': 0.001621555999646299, 'alias': 'blimp_filtered'}}
{'ewok_filtered': {'acc,none': 0.5005777851120263, 'acc_stderr,none': 0.005731039560705602, 'alias': 'ewok_filtered'}, 'blimp_supplement': {'acc,none': 0.5811987984079156, 'acc_stderr,none': 0.0053638650192445216, 'alias': 'blimp_supplement'}, 'blimp_filtered': {'acc,none': 

Unnamed: 0,model_name,blimp_supplement_acc,blimp_supplement_stderr,blimp_filtered_acc,blimp_filtered_stderr,ewok_filtered_acc,ewok_filtered_stderr
0,BabyLlama1-58M-strict,0.581199,0.005364,0.676288,0.00162,0.500578,0.005731
1,DistilledGPT-44M-strict,0.588133,0.005255,0.657983,0.001628,0.49961,0.005732
2,GPT2-small-97M-strict,0.596177,0.005429,0.669808,0.001691,0.503546,0.00573
3,PalenkaLlama1-58M-strict-L0.1-H0.1_20250419_16...,0.596101,0.005435,0.693733,0.001617,0.499761,0.005731
4,PalenkaLlama1-58M-strict-L0.1-H0.3_20250419_19...,0.599034,0.005262,0.693399,0.001616,0.498806,0.005732
5,PalenkaLlama1-58M-strict-L0.1-H0.5_20250419_21...,0.606193,0.004982,0.694192,0.001622,0.502876,0.005731
