In [None]:
import pickle as pkl
import ast
import matplotlib.pyplot as plt
from cods.od.visualization import plot_preds, create_pdf_with_plots
import re

In [None]:
name = "results-exp-detr"

In [None]:
with open(f"./{name}.pkl", "rb") as file:
    results = pkl.load(file)

print(len(results))
print(results)

In [None]:
for key, value in results.items():
    print(f"Key: {key}")
    print("  Confidence Metrics:")
    print(f"    Set Size: {value.confidence_set_sizes.mean()}")
    print(f"    Risk: {value.confidence_coverages.mean()}")
    print("  Localization Metrics:")
    print(f"    Set Size: {value.localization_set_sizes.mean()}")
    print(f"    Risk: {value.localization_coverages.mean()}")
    print("  Classification Metrics:")
    print(f"    Set Size: {value.classification_set_sizes.mean()}")
    print(f"    Risk: {value.classification_coverages.mean()}")
    print(f"  Global Risk: {value.global_coverage.mean()}")
    print("-" * 50)
import pandas as pd

# Create a list to store the data for the DataFrame
data = []

# Iterate through the results dictionary
for key, value in results.items():
    data.append(
        {
            "Key": key,
            "Confidence Set Size (Mean)": value.confidence_set_sizes.mean(),
            "Confidence Risk (Mean)": value.confidence_coverages.mean(),
            "Localization Set Size (Mean)": value.localization_set_sizes.mean(),
            "Localization Risk (Mean)": value.localization_coverages.mean(),
            "Classification Set Size (Mean)": value.classification_set_sizes.mean(),
            "Classification Risk (Mean)": value.classification_coverages.mean(),
            "Global Risk (Mean)": value.global_coverage.mean(),
        }
    )

# Create a DataFrame from the data
df = pd.DataFrame(data)

# # Display the DataFrame
# print(df)
metrics = [
    "Global Risk (Mean)",
    "Confidence Set Size (Mean)",
    "Confidence Risk (Mean)",
    "Localization Set Size (Mean)",
    "Localization Risk (Mean)",
    "Classification Set Size (Mean)",
    "Classification Risk (Mean)",
]

for metric in metrics:
    df[metric] = df[metric].apply(lambda x: float(x.item()))

# df.to_csv("output-yolo.csv")
# # Plot each metric
# fig, axes = plt.subplots(
#     len(metrics),
#     1,
#     figsize=(12, 18 * len(metrics)),  # , sharex=True
# )

# for i, metric in enumerate(metrics):
#     axes[i].barh(df["Key"], df[metric], color="skyblue")
#     axes[i].set_title(f"Comparison of {metric} Across Keys")
#     axes[i].set_xlabel(metric)
#     axes[i].set_ylabel("Key")
#     if "Set Size" not in metric:
#         alphas = df["Key"].tolist()[0].split("-")[1]
#         alphas = ast.literal_eval(alphas)
#         if "Global" in metric:
#             alpha = sum(alphas[1:])
#         elif "Confidence" in metric:
#             alpha = alphas[0]
#         elif "Localization" in metric:
#             alpha = alphas[1]
#         elif "Classification" in metric:
#             alpha = alphas[2]
#         axes[i].axvline(
#             x=alpha, color="red", linestyle="--", label=f"Threshold = {alpha}"
#         )
#     axes[i].legend()

# plt.tight_layout()
# plt.show()
# Proper Dataframe Construction
# Define the lists provided by the user
# Sort by length descending to match longer strings first (e.g., 'box_count_threshold' before 'box')
alphas_str = [
    "[0.02, 0.05, 0.05]",
    "[0.03, 0.1, 0.1]",
]  # Keep as strings for matching
matching_functions = sorted(["mix", "hausdorff", "lac", "giou"], key=len, reverse=True)
confidence_methods = sorted(
    ["box_count_threshold", "box_count_recall", "box_thresholded_distance"],
    key=len,
    reverse=True,
)
localization_methods = sorted(["thresholded", "pixelwise", "boxwise"], key=len, reverse=True)
classification_prediction_sets = sorted(["lac", "aps"], key=len, reverse=True)
localization_prediction_sets = sorted(["additive", "multiplicative"], key=len, reverse=True)


# Define the improved extraction function
def extract_key_components_revised(key_string):
    alpha_cnf = None
    alpha_loc = None
    alpha_cls = None
    alpha_tot = None
    matching_function = None
    confidence_method = None
    localization_method = None
    classification_set = None
    localization_set = None

    try:
        # 1. Extract Alpha
        match = re.search(r"alpha-(.*?)-", key_string)
        if match:
            alpha = match.group(1)
            alpha = eval(alpha)
            alpha_cnf = alpha[0]
            alpha_loc = alpha[1]
            alpha_cls = alpha[2]
            alpha_tot = alpha_loc + alpha_cls
            # Remaining string starts after 'alpha-[alpha_val]-'
            remaining_string = key_string[len(f"alpha-{alpha}-") :]
        else:
            return pd.Series([None] * 6)  # Return Nones if basic structure fails

        # 2. Extract Matching Function
        for mf in matching_functions:
            if remaining_string.startswith(mf + "_"):
                matching_function = mf
                remaining_string = remaining_string[len(mf + "_") :]
                break

        # 3. Extract Confidence Method
        for cm in confidence_methods:
            if remaining_string.startswith(cm + "_"):
                confidence_method = cm
                remaining_string = remaining_string[len(cm + "_") :]
                break

        # 4. Extract Localization Method
        for lm in localization_methods:
            if remaining_string.startswith(lm + "_"):
                localization_method = lm
                remaining_string = remaining_string[len(lm + "_") :]
                break

        # 5. Extract Classification Prediction Set
        for cps in classification_prediction_sets:
            # Check if it's the last component or followed by '_'
            if remaining_string.startswith(cps + "_"):
                classification_set = cps
                remaining_string = remaining_string[len(cps + "_") :]
                break
            elif remaining_string == cps:  # Handle case where it's the last component
                classification_set = cps
                remaining_string = ""
                break

        # 6. Extract Localization Prediction Set (the remainder)
        # Check against the known list, otherwise assign the remainder
        found_lps = False
        for lps in localization_prediction_sets:
            if remaining_string == lps:
                localization_set = lps
                found_lps = True
                break
        # If no exact match from the list, assign the remaining string
        # (This handles potential unexpected values or if the list is incomplete)
        # Update: Based on the expected structure, the last part *should* be the localization set.
        if not found_lps and remaining_string in localization_prediction_sets:
            localization_set = remaining_string

        # If after matching classification_set, the remaining string is exactly one of the localization_sets
        if classification_set and remaining_string in localization_prediction_sets:
            localization_set = remaining_string

    except Exception as e:
        print(f"Error parsing key: {key_string} - {e}")
        # Return Nones if any error occurs during parsing
        return pd.Series(
            [None] * 6,
            index=[
                "Confidence Alpha",
                "Localization Alpha",
                "Classification Alpha",
                "Global Alpha",
                "Matching Function",
                "Confidence Method",
                "Localization Method",
                "Classification Prediction Set",
                "Localization Prediction Set",
            ],
        )

    return pd.Series(
        [
            alpha_cnf,
            alpha_loc,
            alpha_cls,
            alpha_tot,
            matching_function,
            confidence_method,
            localization_method,
            classification_set,
            localization_set,
        ]
    )


# Apply the revised function to the 'Key' column
new_columns = df["Key"].apply(extract_key_components_revised)
new_columns.columns = [
    "Confidence Alpha",
    "Localization Alpha",
    "Classification Alpha",
    "Global Alpha",
    "Matching Function",
    "Confidence Method",
    "Localization Method",
    "Classification Prediction Set",
    "Localization Prediction Set",
]

# Concatenate the new columns with the original DataFrame
df_updated = pd.concat([df.drop("Key", axis=1, errors="ignore"), new_columns], axis=1)


# Display the first 5 rows with the new columns
print("DataFrame with correctly decomposed 'Key' column:")
print(df_updated.head().to_string(index=False))

# Print the column names and their data types
print("\nDataFrame Info:")
print(df_updated.info())

# Display unique values for verification
print("\nUnique values in new columns:")
for col in new_columns.columns:
    # Show None separately if present
    unique_vals = df_updated[col].unique()
    unique_vals_list = [str(v) for v in unique_vals if pd.notna(v)]
    if df_updated[col].isna().any():
        unique_vals_list.append("None")
    print(f"- {col}: {unique_vals_list}")
    # print(f"- {col}: {df[col].unique()}")
df2 = df_updated.copy()
df2.to_csv(f"output-{name}.csv", index=False)