In [1]:
import pandas as pd
import yaml
import os

## Configuration
### Files and Precision

In [2]:
precision = 3

# RSNA Bone Age
rsna_stats_csv_file = "/app/results/aggregated_stats/rsna_test_best_err_uq_stats.csv"
rsna_uq_by_error_aucs_csv_file = (
    "/app/results/aggregated_stats/rsna_test_best_uncertainty_by_error_aucs.csv"
)
rsna_uq_reorder_ranks_csv_file = (
    "/app/results/aggregated_stats/rsna_test_best_uq_by_error_reorder_ranks.csv"
)

# Clavicle CT
clavicle_stats_csv_file = "/app/results/aggregated_stats/clavicle_test_best_err_uq_stats.csv"
clavicle_uq_by_error_aucs_csv_file = (
    "/app/results/aggregated_stats/clavicle_test_best_uncertainty_by_error_aucs.csv"
)
clavicle_uq_reorder_ranks_csv_file = (
    "/app/results/aggregated_stats/clavicle_test_best_uncertainty_by_error_reorder_ranks.csv"
)


### Column Names

In [3]:
COLUMN_NAMES = {
    # General
    "name": "Model",
    # Error/ UQ Stats
    "error": "Error ± Std",
    "error_median": "Error Median",
    "uq": "Uncertainty ± Std",
    "uncertainty_median": "Uncertainty Median",
    # Uncertainty by Error AUCs
    "mean_line_auc": "Mean AUC",
    "mean_to_half": "Mean-To-Half",
    "min_line_auc": "Min AUC",
    "min_to_half": "Min-To-Half",
    # Reorder Ranks
    "sort_min_swaps_rank": "Min Swaps",
    "sort_avg_idx_distance": "Avg. Index Distance",
    "sort_avg_idx_right_move": "Avg. Right-Move",
    "sort_idx_right_move_count": "Right-Move Counts",
    "sort_avg_idx_distance_norm": "Avg. Index Distance Normed",
    "sort_avg_idx_right_move_norm": "Avg. Right-Move Normed",
}

STATS_MARKDOWN_COLS = [
    COLUMN_NAMES[key] for key in ["name", "error", "error_median", "uq", "uncertainty_median"]
]
UQ_BY_ERROR_AUCS_MARDOWN_COLS = [
    COLUMN_NAMES[key]
    for key in ["name", "mean_line_auc", "mean_to_half", "min_line_auc", "min_to_half"]
]
UQ_BY_ERROR_REORDER_RANKS_COLS = [
    COLUMN_NAMES[key]
    for key in [
        "name",
        "sort_min_swaps_rank",
        "sort_avg_idx_distance",
        "sort_avg_idx_distance_norm",
        "sort_avg_idx_right_move",
        "sort_avg_idx_right_move_norm",
        "sort_idx_right_move_count",
    ]
]


## Util Functions

In [4]:
format_fn = lambda x: "{:#.{prec}f}".format(x, prec=precision)


def postprocess_stats_df(df: pd.DataFrame) -> pd.DataFrame:
    df["error"] = df["error_mean"].apply(format_fn) + " ± " + df["error_std"].apply(format_fn)
    df["uq"] = (
        df["uncertainty_mean"].apply(format_fn) + " ± " + df["uncertainty_std"].apply(format_fn)
    )
    df["error_median"] = df["error_median"].apply(format_fn)
    df["uncertainty_median"] = df["uncertainty_median"].apply(format_fn)
    return df


def postprocess_uq_by_error_aucs_df(df: pd.DataFrame) -> pd.DataFrame:
    for col in ["mean_line_auc", "mean_to_half", "min_line_auc", "min_to_half"]:
        df[col] = df[col].apply(format_fn)
    return df


def postprocess_uq_by_error_ranks_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.round(
        {
            "sort_avg_idx_distance": 1,
            "sort_avg_idx_right_move": 1,
            "sort_avg_idx_distance_norm": 3,
            "sort_avg_idx_right_move_norm": 3,
        }
    )
    return df


def update_col_names(df: pd.DataFrame) -> pd.DataFrame:
    return df.rename(COLUMN_NAMES, inplace=False, axis=1)


## RSNA Bone Age

In [5]:
rsna_df_stats = postprocess_stats_df(pd.read_csv(rsna_stats_csv_file))
rsna_df_uq_by_error_aucs = postprocess_uq_by_error_aucs_df(
    pd.read_csv(rsna_uq_by_error_aucs_csv_file)
)
rsna_df_uq_reorder_ranks = postprocess_uq_by_error_ranks_df(
    pd.read_csv(rsna_uq_reorder_ranks_csv_file)
)


### Markdown Tables

#### Error / UQ Stats

In [6]:
print(update_col_names(rsna_df_stats)[STATS_MARKDOWN_COLS].to_markdown(index=False))


| Model          | Error ± Std   |   Error Median | Uncertainty ± Std   |   Uncertainty Median |
|:---------------|:--------------|---------------:|:--------------------|---------------------:|
| Mean-Predictor | 2.744 ± 1.949 |          2.378 | nan ± nan           |              nan     |
| Baseline       | 0.552 ± 0.488 |          0.415 | nan ± nan           |              nan     |
| MCDO-10        | 0.561 ± 0.490 |          0.432 | 0.357 ± 0.159       |                0.343 |
| MCDO-100       | 0.553 ± 0.491 |          0.419 | 0.369 ± 0.137       |                0.379 |
| DE-10          | 0.530 ± 0.472 |          0.411 | 0.234 ± 0.092       |                0.22  |
| DE-20          | 0.526 ± 0.470 |          0.402 | 0.234 ± 0.084       |                0.22  |
| LA             | 0.551 ± 0.482 |          0.42  | 0.446 ± 0.066       |                0.438 |
| SWAG           | 0.568 ± 0.496 |          0.43  | 0.854 ± 0.262       |                0.789 |
| SWAG-NO-WD     | 0.815 ± 0.6

#### Uncertainty by Error AUCs

In [7]:
print(
    update_col_names(rsna_df_uq_by_error_aucs)[UQ_BY_ERROR_AUCS_MARDOWN_COLS].to_markdown(
        index=False
    )
)


| Model        |   Mean AUC |   Mean-To-Half |   Min AUC |   Min-To-Half |
|:-------------|-----------:|---------------:|----------:|--------------:|
| MCDO-10      |      0     |          0.5   |     0.21  |         0.29  |
| MCDO-100     |      0.167 |          0.333 |     0.235 |         0.265 |
| DE-10        |      0.731 |          0.231 |     0.178 |         0.322 |
| DE-20        |      0.533 |          0.033 |     0.558 |         0.058 |
| LA           |      0.167 |          0.333 |     0.323 |         0.177 |
| SWAG         |      0.571 |          0.071 |     0.182 |         0.318 |
| SWAG-NO-WD   |      0.41  |          0.09  |     0.252 |         0.248 |
| VAR-MCDO-10  |      0     |          0.5   |     0.549 |         0.049 |
| VAR-MCDO-100 |      0.215 |          0.285 |     0.347 |         0.153 |
| VAR-DE-10    |      0.246 |          0.254 |     0.372 |         0.128 |
| VAR-DE-20    |      0.348 |          0.152 |     0.306 |         0.194 |


#### Uncertainty Reorder Ranks

In [8]:
print(
    update_col_names(rsna_df_uq_reorder_ranks)[UQ_BY_ERROR_REORDER_RANKS_COLS].to_markdown(
        index=False
    )
)


| Model        |   Min Swaps |   Avg. Index Distance |   Avg. Index Distance Normed |   Avg. Right-Move |   Avg. Right-Move Normed |   Right-Move Counts |
|:-------------|------------:|----------------------:|-----------------------------:|------------------:|-------------------------:|--------------------:|
| MCDO-10      |        1227 |                 420.9 |                        0.682 |             210.4 |                    0.341 |                 624 |
| MCDO-100     |        1230 |                 439.7 |                        0.713 |             219.9 |                    0.356 |                 617 |
| DE-10        |        1228 |                 384.5 |                        0.623 |             192.3 |                    0.312 |                 623 |
| DE-20        |        1227 |                 375.9 |                        0.609 |             188   |                    0.305 |                 641 |
| LA           |        1221 |                 391.1 |                

### Latex Tables

#### Error / UQ Stats

In [9]:
print(
    rsna_df_stats[["name", "error", "uq"]]
    .style.highlight_min(subset=["error"], props="bfseries: ;")
    .hide(level=0, axis=0)
    .format()
    .to_latex(column_format="lrr")
)


\begin{tabular}{lrr}
name & error & uq \\
Mean-Predictor & 2.744 ± 1.949 & nan ± nan \\
Baseline & 0.552 ± 0.488 & nan ± nan \\
MCDO-10 & 0.561 ± 0.490 & 0.357 ± 0.159 \\
MCDO-100 & 0.553 ± 0.491 & 0.369 ± 0.137 \\
DE-10 & 0.530 ± 0.472 & 0.234 ± 0.092 \\
DE-20 & \bfseries 0.526 ± 0.470 & 0.234 ± 0.084 \\
LA & 0.551 ± 0.482 & 0.446 ± 0.066 \\
SWAG & 0.568 ± 0.496 & 0.854 ± 0.262 \\
SWAG-NO-WD & 0.815 ± 0.660 & 1.805 ± 1.031 \\
VAR-MCDO-10 & 0.597 ± 0.500 & 0.974 ± 0.198 \\
VAR-MCDO-100 & 0.569 ± 0.483 & 0.980 ± 0.183 \\
VAR-DE-10 & 0.545 ± 0.467 & 0.832 ± 0.139 \\
VAR-DE-20 & 0.545 ± 0.463 & 0.831 ± 0.149 \\
\end{tabular}



#### Uncertainty Reorder Ranks

In [10]:
print(
    rsna_df_uq_reorder_ranks[["name", "sort_avg_idx_distance", "sort_avg_idx_distance_norm"]]
    .style.highlight_min(
        subset=["sort_avg_idx_distance", "sort_avg_idx_distance_norm"], props="bfseries: ;"
    )
    .hide(level=0, axis=0)
    .format(subset="sort_avg_idx_distance", precision=1)
    .format(subset="sort_avg_idx_distance_norm", precision=3)
    .to_latex()
)


\begin{tabular}{lrr}
name & sort_avg_idx_distance & sort_avg_idx_distance_norm \\
MCDO-10 & 420.9 & 0.682 \\
MCDO-100 & 439.7 & 0.713 \\
DE-10 & 384.5 & 0.623 \\
DE-20 & 375.9 & 0.609 \\
LA & 391.1 & 0.634 \\
SWAG & 388.3 & 0.629 \\
SWAG-NO-WD & \bfseries 326.0 & \bfseries 0.528 \\
VAR-MCDO-10 & 410.5 & 0.665 \\
VAR-MCDO-100 & 407.3 & 0.660 \\
VAR-DE-10 & 423.3 & 0.686 \\
VAR-DE-20 & 427.9 & 0.693 \\
\end{tabular}



## Clavicle CT

In [11]:
clavicle_df_stats = postprocess_stats_df(pd.read_csv(clavicle_stats_csv_file))
clavicle_df_uq_by_error_aucs = postprocess_uq_by_error_aucs_df(
    pd.read_csv(clavicle_uq_by_error_aucs_csv_file)
)
clavicle_df_uq_reorder_ranks = postprocess_uq_by_error_ranks_df(
    pd.read_csv(clavicle_uq_reorder_ranks_csv_file)
)


### Markdown Tables

#### Error / UQ Stats

In [12]:
print(update_col_names(clavicle_df_stats)[STATS_MARKDOWN_COLS].to_markdown(index=False))


| Model          | Error ± Std   |   Error Median | Uncertainty ± Std   |   Uncertainty Median |
|:---------------|:--------------|---------------:|:--------------------|---------------------:|
| Mean-Predictor | 3.852 ± 2.308 |          3.897 | nan ± nan           |              nan     |
| Baseline       | 1.597 ± 1.267 |          1.314 | nan ± nan           |              nan     |
| MCDO-10        | 1.593 ± 1.270 |          1.304 | 0.635 ± 0.328       |                0.599 |
| MCDO-100       | 1.594 ± 1.266 |          1.291 | 0.654 ± 0.277       |                0.663 |
| DE-10          | 1.510 ± 1.170 |          1.264 | 1.504 ± 0.601       |                1.38  |
| DE-20          | 1.502 ± 1.168 |          1.251 | 1.511 ± 0.531       |                1.43  |
| LA             | 1.590 ± 1.324 |          1.355 | 1.241 ± 0.346       |                1.139 |
| SWAG           | 1.548 ± 1.347 |          1.091 | 2.359 ± 0.601       |                2.272 |
| SWAG-NO-WD     | 1.730 ± 1.3

#### Uncertainty by Error AUCs

In [13]:
print(
    update_col_names(clavicle_df_uq_by_error_aucs)[UQ_BY_ERROR_AUCS_MARDOWN_COLS].to_markdown(
        index=False
    )
)


| Model        |   Mean AUC |   Mean-To-Half |   Min AUC |   Min-To-Half |
|:-------------|-----------:|---------------:|----------:|--------------:|
| MCDO-10      |      0.178 |          0.322 |     0.201 |         0.299 |
| MCDO-100     |      0.447 |          0.053 |     0.283 |         0.217 |
| DE-10        |      0     |          0.5   |     0.578 |         0.078 |
| DE-20        |      0.789 |          0.289 |     0.467 |         0.033 |
| LA           |      0.131 |          0.369 |     0.262 |         0.238 |
| SWAG         |      0     |          0.5   |     0.466 |         0.034 |
| SWAG-NO-WD   |      0     |          0.5   |     0.698 |         0.198 |
| VAR-MCDO-10  |      0.254 |          0.246 |     0.498 |         0.002 |
| VAR-MCDO-100 |      0.545 |          0.045 |     0.562 |         0.062 |
| VAR-DE-10    |      0.143 |          0.357 |     0.486 |         0.014 |
| VAR-DE-20    |      0.143 |          0.357 |     0.451 |         0.049 |


#### Uncertainty Reorder Ranks

In [14]:
print(
    update_col_names(clavicle_df_uq_reorder_ranks)[UQ_BY_ERROR_REORDER_RANKS_COLS].to_markdown(
        index=False
    )
)


| Model        |   Min Swaps |   Avg. Index Distance |   Avg. Index Distance Normed |   Avg. Right-Move |   Avg. Right-Move Normed |   Right-Move Counts |
|:-------------|------------:|----------------------:|-----------------------------:|------------------:|-------------------------:|--------------------:|
| MCDO-10      |         292 |                  89.2 |                        0.595 |              44.6 |                    0.297 |                 157 |
| MCDO-100     |         294 |                  89.6 |                        0.598 |              44.8 |                    0.299 |                 165 |
| DE-10        |         295 |                  88.9 |                        0.592 |              44.4 |                    0.296 |                 157 |
| DE-20        |         292 |                  88.9 |                        0.593 |              44.5 |                    0.296 |                 155 |
| LA           |         294 |                 102.5 |                

### Latex Tables

#### Error / UQ Stats

In [15]:
print(
    clavicle_df_stats[["name", "error", "uq"]]
    .style.highlight_min(subset=["error"], props="bfseries: ;")
    .hide(level=0, axis=0)
    .format()
    .to_latex(column_format="lrr")
)


\begin{tabular}{lrr}
name & error & uq \\
Mean-Predictor & 3.852 ± 2.308 & nan ± nan \\
Baseline & 1.597 ± 1.267 & nan ± nan \\
MCDO-10 & 1.593 ± 1.270 & 0.635 ± 0.328 \\
MCDO-100 & 1.594 ± 1.266 & 0.654 ± 0.277 \\
DE-10 & 1.510 ± 1.170 & 1.504 ± 0.601 \\
DE-20 & \bfseries 1.502 ± 1.168 & 1.511 ± 0.531 \\
LA & 1.590 ± 1.324 & 1.241 ± 0.346 \\
SWAG & 1.548 ± 1.347 & 2.359 ± 0.601 \\
SWAG-NO-WD & 1.730 ± 1.392 & 1.826 ± 0.368 \\
VAR-MCDO-10 & 2.110 ± 1.660 & 3.958 ± 0.630 \\
VAR-MCDO-100 & 2.073 ± 1.658 & 3.949 ± 0.628 \\
VAR-DE-10 & 1.776 ± 1.323 & 4.271 ± 0.706 \\
VAR-DE-20 & 1.753 ± 1.298 & 4.171 ± 0.684 \\
\end{tabular}



#### Uncertainty Reorder Ranks

In [16]:
print(
    clavicle_df_uq_reorder_ranks[["name", "sort_avg_idx_distance", "sort_avg_idx_distance_norm"]]
    .style.highlight_min(
        subset=["sort_avg_idx_distance", "sort_avg_idx_distance_norm"], props="bfseries: ;"
    )
    .hide(level=0, axis=0)
    .format(subset="sort_avg_idx_distance", precision=1)
    .format(subset="sort_avg_idx_distance_norm", precision=3)
    .to_latex()
)


\begin{tabular}{lrr}
name & sort_avg_idx_distance & sort_avg_idx_distance_norm \\
MCDO-10 & 89.2 & 0.595 \\
MCDO-100 & 89.6 & 0.598 \\
DE-10 & 88.9 & 0.592 \\
DE-20 & 88.9 & 0.593 \\
LA & 102.5 & 0.683 \\
SWAG & 106.6 & 0.711 \\
SWAG-NO-WD & 96.6 & 0.644 \\
VAR-MCDO-10 & 84.4 & 0.562 \\
VAR-MCDO-100 & 84.5 & 0.563 \\
VAR-DE-10 & 81.4 & 0.542 \\
VAR-DE-20 & \bfseries 81.2 & \bfseries 0.541 \\
\end{tabular}

