In [1]:
import pickle as pkl

In [2]:
results = pkl.load(open("results-exp1-detr.pkl", "rb"))

In [9]:
dir(list(results.items())[0][1])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'classification_coverages',
 'classification_set_sizes',
 'confidence_coverages',
 'confidence_set_sizes',
 'conformalized_id',
 'global_coverage',
 'localization_coverages',
 'localization_set_sizes',
 'parameters_id',
 'predictions_id',
 'preds']

In [8]:
for key, value in results.items():
    print(f"{key}: {value.classification_set_sizes.mean()}")

alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_thresholded_lac_additive: 14.601300239562988
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_thresholded_lac_multiplicative: 14.601300239562988
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_thresholded_aps_additive: 19.527606964111328
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_thresholded_aps_multiplicative: 10.438751220703125
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_pixelwise_lac_additive: 8.62268352508545
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_pixelwise_lac_multiplicative: 8.62268352508545
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_pixelwise_aps_additive: 10.439098358154297
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_pixelwise_aps_multiplicative: 10.439105987548828
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_boxwise_lac_additive: 8.62268352508545
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_boxwise_lac_multiplicative: 8.62268352508545
alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_boxwi

In [12]:
import re
from typing import Any, Dict, Tuple
import numpy as np
import pandas as pd

try:
    import torch

    _HAS_TORCH = True
except Exception:
    _HAS_TORCH = False

# --- helpers ---------------------------------------------------------------

CONFIDENCE_LOSSES = {
    "box_count_threshold",
    "box_count_recall",
    "box_thresholded_distance",
}

LOCALIZATION_GRANULARITIES = {"thresholded", "pixelwise", "boxwise"}
LOCALIZATION_LOSSES = {"lac", "aps"}
SET_RULES = {"additive", "multiplicative"}


def _to_numpy(x: Any) -> np.ndarray:
    """Convert tensor/list/array/scalar to numpy array (1D if possible)."""
    if _HAS_TORCH and isinstance(x, torch.Tensor):
        x = x.detach().cpu().numpy()
    elif isinstance(x, (list, tuple)):
        x = np.array(x)
    elif np.isscalar(x):
        x = np.array([x])
    elif isinstance(x, np.ndarray):
        pass
    else:
        # Fallback: try to wrap
        x = np.array(x)
    return x


def _mean_std(x: Any) -> Tuple[float, float, int]:
    arr = _to_numpy(x).astype(float).ravel()
    n = int(arr.size)
    if n == 0:
        return (np.nan, np.nan, 0)
    return (float(np.nanmean(arr)), float(np.nanstd(arr, ddof=0)), n)


def parse_key(key: str) -> Dict[str, Any]:
    """
    Example key:
    'alpha-[0.02, 0.05, 0.05]-mix_box_count_threshold_thresholded_lac_additive'
    Decomposes into:
      alpha, matching_function, confidence_loss, localization_granularity,
      localization_loss, set_rule
    """
    # strip any trailing ': ...' if present
    core = key.split(":", 1)[0]

    # alpha part
    m = re.match(r"^alpha-\[(.*?)\]-(.*)$", core)
    if m:
        alpha_str, tail = m.group(1), m.group(2)
        # keep alpha both as string and as list of floats if possible
        try:
            alpha_list = [float(a.strip()) for a in alpha_str.split(",")]
        except Exception:
            alpha_list = alpha_str
    else:
        alpha_list, tail = None, core

    toks = tail.split("_")
    if not toks:
        raise ValueError(f"Cannot parse key: {key}")

    matching_function = toks[0]
    toks = toks[1:]

    # greedy match for confidence_loss from the remaining tokens
    confidence_loss = None
    for L in (3, 2, 1):  # try 3-token, then 2-token, then 1-token
        if len(toks) >= L:
            candidate = "_".join(toks[:L])
            if candidate in CONFIDENCE_LOSSES:
                confidence_loss = candidate
                toks = toks[L:]
                break
    if confidence_loss is None:
        # As a fallback, join until we hit a known next component
        # but in your examples this shouldn't trigger
        confidence_loss = "_".join(toks)
        toks = []

    # next must be localization granularity
    localization_granularity = None
    if toks and toks[0] in LOCALIZATION_GRANULARITIES:
        localization_granularity = toks[0]
        toks = toks[1:]

    # next must be localization loss (lac|aps)
    localization_loss = None
    if toks and toks[0] in LOCALIZATION_LOSSES:
        localization_loss = toks[0]
        toks = toks[1:]

    # final must be set rule (additive|multiplicative)
    set_rule = None
    if toks and toks[0] in SET_RULES:
        set_rule = toks[0]
        toks = toks[1:]

    return {
        "alpha_confidence": alpha_list[0],
        "alpha_localization": alpha_list[1],
        "alpha_classification": alpha_list[2],
        "matching_function": matching_function,
        "confidence_loss": confidence_loss,
        "localization_granularity": localization_granularity,
        "localization_loss": localization_loss,
        "set_rule": set_rule,
    }


# --- main aggregation ------------------------------------------------------

METRICS = [
    # (attribute_name_on_value_object, pretty_prefix_in_columns)
    ("classification_set_sizes", "classification_set_sizes"),
    ("classification_coverages", "classification_coverages"),
    ("confidence_set_sizes", "confidence_set_sizes"),
    ("confidence_coverages", "confidence_coverages"),
    ("localization_set_sizes", "localization_set_sizes"),
    ("localization_coverages", "localization_coverages"),
    ("global_coverage", "global_coverage"),
]

rows = []
for key, value in results.items():
    parts = parse_key(key)
    row = {"key": key, **parts}

    for attr, prefix in METRICS:
        if hasattr(value, attr):
            data = getattr(value, attr)
            mean, std, n = _mean_std(data)
            row[f"{prefix}_mean"] = mean
            row[f"{prefix}_std"] = std
            # row[f"{prefix}_n"] = n
        else:
            row[f"{prefix}_mean"] = np.nan
            row[f"{prefix}_std"] = np.nan
            # row[f"{prefix}_n"] = 0

    rows.append(row)

df = pd.DataFrame(rows)

# Useful ordering of columns
meta_cols = [
    "key",
    "alpha_confidence",
    "alpha_localization",
    "alpha_classification",
    "matching_function",
    "confidence_loss",
    "localization_granularity",
    "localization_loss",
    "set_rule",
]
metric_cols = [c for c in df.columns if c not in meta_cols]
df = df[meta_cols + sorted(metric_cols)]

# Example: sort for readability
df = df.sort_values(
    by=[
        "matching_function",
        "confidence_loss",
        "localization_granularity",
        "localization_loss",
        "set_rule",
    ]
).reset_index(drop=True)

df

Unnamed: 0,key,alpha_confidence,alpha_localization,alpha_classification,matching_function,confidence_loss,localization_granularity,localization_loss,set_rule,classification_coverages_mean,...,confidence_coverages_mean,confidence_coverages_std,confidence_set_sizes_mean,confidence_set_sizes_std,global_coverage_mean,global_coverage_std,localization_coverages_mean,localization_coverages_std,localization_set_sizes_mean,localization_set_sizes_std
0,"alpha-[0.02, 0.05, 0.05]-hausdorff_box_count_r...",0.02,0.05,0.05,hausdorff,box_count_recall,boxwise,aps,additive,0.054963,...,0.017648,0.089181,18.9968,21.346138,0.098406,0.192607,0.050359,0.143871,3.343886,2.246981e+00
1,"alpha-[0.02, 0.05, 0.05]-hausdorff_box_count_r...",0.02,0.05,0.05,hausdorff,box_count_recall,boxwise,aps,multiplicative,0.054963,...,0.017648,0.089181,18.9968,21.346138,0.096937,0.180710,0.046651,0.124353,1.746295,1.356751e-01
2,"alpha-[0.02, 0.05, 0.05]-hausdorff_box_count_r...",0.02,0.05,0.05,hausdorff,box_count_recall,boxwise,lac,additive,0.054434,...,0.017648,0.089181,18.9968,21.346138,0.097722,0.190179,0.050359,0.143871,3.343886,2.246981e+00
3,"alpha-[0.02, 0.05, 0.05]-hausdorff_box_count_r...",0.02,0.05,0.05,hausdorff,box_count_recall,boxwise,lac,multiplicative,0.054434,...,0.017648,0.089181,18.9968,21.346138,0.095993,0.178000,0.046651,0.124353,1.746295,1.356751e-01
4,"alpha-[0.02, 0.05, 0.05]-hausdorff_box_count_r...",0.02,0.05,0.05,hausdorff,box_count_recall,pixelwise,aps,additive,0.054963,...,0.017648,0.089181,18.9968,21.346138,0.098594,0.164868,0.049622,0.100204,1.203226,2.185098e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,"alpha-[0.02, 0.05, 0.05]-mix_box_thresholded_d...",0.02,0.05,0.05,mix,box_thresholded_distance,pixelwise,lac,multiplicative,0.046543,...,0.038464,0.190498,34.6948,24.924142,0.091043,0.147713,0.053351,0.095364,1.244141,1.405217e-07
83,"alpha-[0.02, 0.05, 0.05]-mix_box_thresholded_d...",0.02,0.05,0.05,mix,box_thresholded_distance,thresholded,aps,additive,0.046748,...,0.038464,0.190498,34.6948,24.924142,0.091515,0.243893,0.050400,0.218769,3.842282,2.277950e+00
84,"alpha-[0.02, 0.05, 0.05]-mix_box_thresholded_d...",0.02,0.05,0.05,mix,box_thresholded_distance,thresholded,aps,multiplicative,0.046748,...,0.038464,0.190498,34.6948,24.924142,0.103588,0.261861,0.060400,0.238226,1.952148,1.285263e-07
85,"alpha-[0.02, 0.05, 0.05]-mix_box_thresholded_d...",0.02,0.05,0.05,mix,box_thresholded_distance,thresholded,lac,additive,0.046543,...,0.038464,0.190498,34.6948,24.924142,0.091185,0.242810,0.050400,0.218769,3.842282,2.277950e+00


In [13]:
df.to_csv("results_summary_exp1_detr.csv", index=False)