In [None]:
from judge_utils import load_all_results, main_path, results_path
import json
import os
import pandas as pd


In [None]:
# Load results into dataframe
df_main = load_all_results(results_path)


In [None]:
# Reorder
df_main = df_main.sort_values('run_time').reset_index(drop=True)

In [None]:
# Merge columns for easier lookup
df_main["merge_key"] = df_main.apply(
    lambda row: (
        row["structure_id"],
        row["use_img"],
        row["use_json"],
        row["shot"]
    ),
    axis=1
)

judge_files = {
    "BASE": os.path.join(main_path, "analysis", "judge_analysis_BASE.json"),
    "CLAR_Q": os.path.join(main_path, "analysis", "judge_analysis_CLAR_Q.json"),
    "COMM_SH_REF": os.path.join(main_path, "analysis", "judge_analysis_COMM_SH_REF.json"),
    "IMPL_REF": os.path.join(main_path, "analysis", "judge_analysis_IMPL_REF.json")
}

for label, filepath in judge_files.items():
    try:
        with open(filepath, "r") as f:
            judge_data = json.load(f)
            
        rating_dict = {}
        for entry in judge_data:
            key = (
                entry["structure_id"],
                entry["use_img"],
                entry["use_json"],
                entry["shot"]
            )
            rating_dict[key] = entry["rating"]

        # Map the new column from the dictionary, using the merged key
        df_main[label] = df_main["merge_key"].map(rating_dict)
    except Exception:
        #print(f"{filepath} missing")
        continue  # or handle error


In [None]:
# Load the metrics from "parsed_actions_with_metrics.json"
with open(os.path.join(main_path, "analysis", "parsed_actions_with_metrics.json"), "r") as f:
    metrics_data = json.load(f)
    

In [None]:
# Build a dictionary keyed by the composite key from metrics_data
metrics_dict = {}
for entry in metrics_data:
    key = (
        entry["structure_id"],
        entry["use_img"],
        entry["use_json"],
        entry["shot"]
    )
    metrics_dict[key] = {
        "accuracy": entry["accuracy"],
        "precision": entry["precision"],
        "iou": entry["iou"],
        "action_format": entry["action_format"]
    }

# Map each metric onto df_main using the composite key column
df_main["accuracy"] = df_main["merge_key"].map(
    lambda key: metrics_dict[key]["accuracy"] if key in metrics_dict else None
)
df_main["precision"] = df_main["merge_key"].map(
    lambda key: metrics_dict[key]["precision"] if key in metrics_dict else None
)
df_main["iou"] = df_main["merge_key"].map(
    lambda key: metrics_dict[key]["iou"] if key in metrics_dict else None
)
df_main["action_format"] = df_main["merge_key"].map(
    lambda key: metrics_dict[key]["action_format"] if key in metrics_dict else None
)

# Optionally drop the temporary composite key column if no longer needed:
df_main.drop(columns=["merge_key"], inplace=True)

In [None]:
df_main = df_main.drop(columns=["json_file", "Model", "Quantization", "Device", "Number of models",	"Max new tokens",	"Repetition Penalty",	"Max rounds", "json_file"])


In [None]:
df_main.value_counts()


In [None]:
df_main[df_main["BASE"] == 3].value_counts()

In [None]:
df_main[(df_main["BASE"] == 1) & (df_main["accuracy"] > 0)].value_counts()

In [None]:
df_main[df_main["accuracy"] > 0].value_counts()

In [None]:
# Number of rounds analysis when the architect finishes the conversation
df_main[df_main["finished_by_architect"] == True].num_rounds.describe()


In [None]:
df_main[(df_main["accuracy"] > 0) & (df_main["precision"] > 0)].head()

In [None]:
# Just 1 over 6 good accuracy ratings the one-shot was used
df_main[df_main["shot"] == "one-shot"].accuracy.value_counts()

In [None]:
# Convert columns to numeric, coercing errors to NaN if necessary.
df_main["BASE"] = pd.to_numeric(df_main["BASE"], errors="coerce")
df_main["accuracy"] = pd.to_numeric(df_main["accuracy"], errors="coerce")
df_main["precision"] = pd.to_numeric(df_main["precision"], errors="coerce")
df_main["iou"] = pd.to_numeric(df_main["iou"], errors="coerce")

# Then group and compute the mean
table = df_main.groupby(["shot", "use_img", "use_json"])[["BASE", "accuracy", "precision"]].mean().reset_index()

In [None]:
latex_table = table.to_latex(float_format="%.2f")