In [None]:
import pandas as pd
import os
from typing import Tuple

In [None]:
def standardize_two_df(df1: pd.DataFrame, df2: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    common_ids = set(df1["task_id"]) & set(df2["task_id"])
    if not common_ids:
        print("⚠️ No matching task_ids found between the two DataFrames.")
        return df1.iloc[0:0], df2.iloc[0:0]  # return empty aligned frames

    df1_filtered = df1[df1["task_id"].isin(common_ids)].copy()
    df2_filtered = df2[df2["task_id"].isin(common_ids)].copy()

    df1_filtered = df1_filtered.drop_duplicates(subset=["task_id"], keep="first")
    df2_filtered = df2_filtered.drop_duplicates(subset=["task_id"], keep="first")

    df1_filtered = df1_filtered.sort_values("task_id").reset_index(drop=True)
    df2_filtered = df2_filtered.sort_values("task_id").reset_index(drop=True)

    return df1_filtered, df2_filtered

In [None]:
def check_valid_failure(failure: str):
    if isinstance(failure, float) or (isinstance(failure, str) and "AssertionError" in failure and "Mutation" not in failure):
        return True
    return False

In [None]:
res_dir = "RESULT_FILE_PATH"
benchmark  = "CruxEval"
mutations = ["for2while", "for2enumerate", "boolean_literal", "commutative_reorder", "constant_unfold_add", "constant_unfold", "constant_unfold_mult", "demorgan", "literal_format", "random", "sequential" ]

res_dict = {}

for models in os.listdir(res_dir):

    if 'ensemble' in models or ".DS_Store" in models:
        continue

    model_res_dir = os.path.join(res_dir, models)
    no_mut_name = [res for res in os.listdir(model_res_dir) if "no_mutation" in res and benchmark in res][-1]
    no_mut_res_dir = os.path.join(model_res_dir, no_mut_name)
    no_mut_data = pd.read_csv(no_mut_res_dir)

    for res_csv in os.listdir(model_res_dir):
        if benchmark not in res_csv or "ensemble" in res_csv:
            continue
        for mutation in mutations:
            mutation_df = res_dict.get(mutation, pd.DataFrame())

            base_name = os.path.splitext(res_csv)[0]
            parts = base_name.split("_")

            if parts[-1] in mutation.split('_')[-1]:
                mut_data = pd.read_csv(os.path.join(model_res_dir, res_csv))

                mut_data_copy = mut_data.copy()
                no_mut_data_copy = no_mut_data.copy()


                mut_failures = mut_data_copy.loc[:, "failure_type"]
                mut_failures.name = f"failure_type_{models}_{mutation}"
                

                no_mut_failures = no_mut_data_copy.loc[:, "failure_type"]
                no_mut_failures.name = f"failure_type_{models}_no_mut"


                mask = ~(
                    no_mut_failures.str.contains("AssertionError", na=False)
                    & mut_failures.str.contains("AssertionError", na=False)
                )

                no_mut_failures = no_mut_failures[mask]
                mut_failures = mut_failures[mask]

                # Concatenate SIDE BY SIDE
                mutation_df = pd.concat([mutation_df, no_mut_failures, mut_failures], axis=1)
                def custom_sort_key(col_name: str) -> tuple:
                    if "gpt-4o" in col_name.lower():
                        return (0, col_name.lower())
                    elif "gpt-5" in col_name.lower():
                        return (1, col_name.lower())
                    elif "qwen" in col_name.lower():
                        return (2, col_name.lower())
                    elif "llama" in col_name.lower():
                        return (3, col_name.lower())
                    elif "gemma" in col_name.lower():
                        return (4, col_name.lower())
                    elif "deepseek" in col_name.lower():
                        return (5, col_name.lower())
                    elif "codestral" in col_name.lower():
                        return (6, col_name.lower())
                    else:
                        return (7, col_name.lower())
                    
                sorted_cols = sorted(mutation_df.columns, key=custom_sort_key)
                mutation_df = mutation_df.reindex(columns=sorted_cols)
                res_dict[mutation] = mutation_df




# print(res_dict)
os.makedirs("temp_res2", exist_ok=True)
for key, df in res_dict.items():
    df.to_csv(f"temp_res2/{key}.csv")
