# This notebook generates the tables reported in the paper.

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

from typing import List
import numpy as np
import pandas as pd
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

from src.data import SLOW_DATASETS
from src.models import get_model_default_kwargs_for_ds
from src.experiments import shared_experiment_logic
from src.utils import create_folder

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

DATASET_DIR = "../datasets"
RESULTS_DIR = "../results"

method_mapping = {
    'naive' : 'Ablation 1',
    'positive' : 'Ablation 2',
    'aum' : 'AUM',
    'normalized_margins': 'Normalized Margins',
    'cross_val': 'Self-Confidence',
    'cleanlab': 'CleanLab',
}

## Generate the dataset overview table

In [2]:
def name_title_mapping(ds_name: str):
    """Converts a dataset name to a more readable title."""
    ds_name = ds_name.replace('_', ' ')
    ds_name = ds_name.title()
    return ds_name

def get_model_entry_data(ds_name: str, save_dir: str):
    """Computes the summary data for a dataset."""
    
    # Get the default model configuration for the dataset
    model_config = get_model_default_kwargs_for_ds(ds_name)

    if ds_name in SLOW_DATASETS:
        model_config['device'] = 'cuda'
    
    # Compute the raw accuracies without label noise
    (
        X, y, model, y_noisy, noise_mask, 
        accuracies, aum_values, 
        values_naive, values_pos, 
        values_entropy, oos_probas,
    ) = shared_experiment_logic(
        ds_name=ds_name, ds_kwargs={}, 
        noise_type='uniform', noise_level=0.0,
        model_config=model_config,
        n_folds=5, random_state=42, save_dir=save_dir, 
        verbose=False
    )

    entry = {
        'Name': name_title_mapping(ds_name),
        # Dataset info
        '#Samples' : X.shape[0],
        '#Features' : X.shape[1],
        '#Classes' : len(np.unique(y)),
        # Model info
        '#Estimators' : model_config['n_estimators'],
        'Max. Tree Depth' : model_config['max_depth'],
        'Val. Acc.' : round(100*accuracies[-1], 4),
    }
    
    return entry

In [3]:
ds_name_list = [
    'cardiotocography', 'credit_card_fraud', 'digits', 
    'human_activity_recognition', 'letters', 'satelite', 
    'sensorless_drive', 'spirals', 'mushrooms',
]
summary_df = pd.DataFrame(
    [get_model_entry_data(ds_name, DATASET_DIR) for ds_name in tqdm(ds_name_list)]
)
summary_df

100%|██████████| 9/9 [02:45<00:00, 18.34s/it]


Unnamed: 0,Name,#Samples,#Features,#Classes,#Estimators,Max. Tree Depth,Val. Acc.
0,Cardiotocography,2126,21,3,30,3,94.92
1,Credit Card Fraud,284807,30,2,50,5,99.9586
2,Digits,1797,64,10,50,5,96.4942
3,Human Activity Recognition,10299,562,6,100,5,99.2329
4,Letters,20000,16,26,100,5,96.11
5,Satelite,6435,36,6,50,5,91.6706
6,Sensorless Drive,58509,48,11,50,5,99.8428
7,Spirals,1500,11,3,50,5,99.6
8,Mushrooms,8124,117,2,30,3,100.0


In [4]:
# Write the summary data to an Excel file
create_folder(RESULTS_DIR + '/tables')
summary_df.to_excel(RESULTS_DIR + '/tables/dataset_info_summary.xlsx', index=False)

## Generate Relative Improvement and Superiority Table

In [5]:
df = pd.read_csv(RESULTS_DIR + '/label_error_trials.csv')
df = df[df["method"] != "entropy"]

# Compute the relative improvement for each pair of methods
methods = df['method'].unique()
mapped_methods = [method_mapping[method] for method in methods]
improvements = pd.DataFrame(columns=mapped_methods, index=mapped_methods)
superiority = pd.DataFrame(columns=mapped_methods, index=mapped_methods)

for i, method_A in enumerate(methods):
    for j, method_B in enumerate(methods):
        auroc_A = df[df['method'] == method_A]['auroc'].values
        auroc_B = df[df['method'] == method_B]['auroc'].values
        # Compute the superiority of method A over method B
        superiority.iloc[i, j] = f"{100*np.mean(auroc_A >= auroc_B):.1f}%"
        # Compute the relative improvement
        rel_improvement = (auroc_B - auroc_A) / (1 - auroc_A)
        rel_improvement[np.isnan(rel_improvement)] = 0
        improvement = 100 * np.median(rel_improvement)
        improvements.iloc[i, j] = f"{improvement:.1f}%" 
        
print("Median AUROC improvement\n", improvements)
print("\nSuperiority\n", superiority)

improvements.to_excel(RESULTS_DIR + '/tables/median_auroc_improvement.xlsx')
superiority.to_excel(RESULTS_DIR + '/tables/superiority.xlsx')

Median AUROC improvement
                       AUM Ablation 1 Ablation 2 Self-Confidence Normalized Margins  CleanLab
AUM                  0.0%    -185.3%      -4.2%            0.4%               0.0%   -794.4%
Ablation 1          65.0%       0.0%      62.3%           70.1%              62.8%   -122.4%
Ablation 2           4.1%    -165.5%       0.0%           11.9%              16.8%   -604.0%
Self-Confidence     -0.4%    -234.5%     -13.5%            0.0%               0.0%  -1326.4%
Normalized Margins   0.0%    -168.6%     -20.2%            0.0%               0.0%  -1232.2%
CleanLab            88.8%      55.0%      85.8%           93.0%              92.5%      0.0%

Superiority
                        AUM Ablation 1 Ablation 2 Self-Confidence Normalized Margins CleanLab
AUM                 100.0%      97.8%      75.9%           49.6%              50.9%    99.3%
Ablation 1            4.1%     100.0%       9.6%           10.6%               5.9%    85.6%
Ablation 2           43.3%    

## Generate the AUROC and FPR tables

In [6]:
path = RESULTS_DIR + '/label_error_trials.csv'
index_cols = ["ds_name", "method", "noise_type", "noise_level", "random_state"]

df = pd.read_csv(path)
multi_index = pd.MultiIndex.from_frame(df.loc[:, index_cols])
df = df.drop(columns=index_cols)
df.index = multi_index

In [7]:
# Mappings to increase readability
col_mapping = {
    'auroc': 'AUROC (%)',
    'fpr_50': 'FPR@TPR 50 (%)',
}

def generate_condition_results_table(
        df: pd.DataFrame,
        ds_names: List[str], 
        noise_type: str, 
        noise_level: float
    ) -> pd.DataFrame:
    """
    Generates a table with the mean and standard deviation of AUROC and
    FPR@TPR 50 for the specified conditions.
    """
    methods = list(method_mapping.keys()) 
    cols_of_interest = list(col_mapping.keys())

    # Create empty multi-level column dataframe
    cols_first_level = [col_mapping[col] for col in cols_of_interest]
    cols_second_level = [method_mapping[m] for m in methods]
    cols = pd.MultiIndex.from_product([cols_first_level, cols_second_level])
    df_filtered = pd.DataFrame(index=ds_names, columns=cols)

    # Gather the relevant data
    for ds_name in ds_names:
        for method in methods:
            df_ds = df.loc[(ds_name, method, noise_type, noise_level)]
            for col in cols_of_interest:
                # Combine mean +- std as string
                means = df_ds[col].mean().round(3) * 100
                stds = df_ds[col].std().round(3) * 100
                value = f"{means:.1f}±{stds:.1f}"
                # Add to the dataframe
                column = (col_mapping[col], method_mapping[method])
                df_filtered.loc[ds_name, column] = value
    
    # Include dataset name as first column
    df_filtered.index.name = 'Dataset'
    # Make the first letter of the dataset name uppercase
    df_filtered.index = df_filtered.index.str.capitalize()
    # Sort the dataframe by the index
    df_filtered.sort_index(inplace=True)

    return df_filtered

df_filtered = generate_condition_results_table(
    df, ds_name_list, 'asymmetric', 0.05
)
df_filtered  

Unnamed: 0_level_0,AUROC (%),AUROC (%),AUROC (%),AUROC (%),AUROC (%),AUROC (%),FPR@TPR 50 (%),FPR@TPR 50 (%),FPR@TPR 50 (%),FPR@TPR 50 (%),FPR@TPR 50 (%),FPR@TPR 50 (%)
Unnamed: 0_level_1,Ablation 1,Ablation 2,AUM,Normalized Margins,Self-Confidence,CleanLab,Ablation 1,Ablation 2,AUM,Normalized Margins,Self-Confidence,CleanLab
Dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Cardiotocography,96.8±1.7,98.9±0.6,98.9±0.6,98.9±0.5,98.8±0.5,93.7±1.4,31.6±2.7,4.8±2.2,3.6±2.0,6.8±3.5,14.4±4.3,7.0±3.3
Credit_card_fraud,100.0±0.0,100.0±0.0,100.0±0.0,100.0±0.0,100.0±0.0,94.9±0.3,0.6±0.0,0.3±0.1,0.3±0.1,0.5±0.1,0.5±0.1,0.5±0.1
Digits,95.6±1.1,99.5±0.2,99.6±0.2,99.6±0.2,99.5±0.2,96.2±1.8,2.7±1.4,2.8±1.7,1.5±1.4,0.6±1.0,1.5±1.4,0.6±1.0
Human_activity_recognition,99.0±0.5,99.8±0.0,99.8±0.1,100.0±0.0,100.0±0.0,98.8±0.6,2.1±1.1,0.7±0.5,0.6±0.6,0.0±0.1,0.0±0.1,0.0±0.1
Letters,99.2±0.1,99.1±0.1,99.4±0.1,99.5±0.0,99.3±0.1,97.1±0.4,6.2±0.8,9.1±0.9,2.9±0.6,2.0±0.6,6.0±0.9,1.9±0.6
Mushrooms,99.9±0.1,100.0±0.0,100.0±0.0,100.0±0.0,100.0±0.0,99.2±0.7,0.8±0.9,0.0±0.0,0.0±0.0,0.0±0.0,0.0±0.0,0.0±0.0
Satelite,96.3±0.8,98.0±0.3,98.0±0.3,98.4±0.2,98.4±0.3,94.8±1.0,33.7±1.6,21.4±2.7,20.6±2.7,14.5±1.6,18.2±2.9,14.5±1.6
Sensorless_drive,99.9±0.1,100.0±0.0,100.0±0.0,100.0±0.0,100.0±0.0,99.0±0.3,1.6±0.2,0.2±0.1,0.1±0.1,0.3±0.1,1.5±0.3,0.3±0.1
Spirals,96.8±1.4,99.4±0.5,99.6±0.3,99.8±0.1,99.8±0.1,96.7±1.4,0.5±1.0,0.0±0.0,0.0±0.0,0.0±0.0,0.5±1.1,0.0±0.0


In [8]:
# Write the results to an Excel file
create_folder(RESULTS_DIR + '/tables')
path = RESULTS_DIR + '/tables/results_per_condition.xlsx'
with pd.ExcelWriter(path) as writer:
    for noise_type in df.index.get_level_values('noise_type').unique():
        for noise_level in df.index.get_level_values('noise_level').unique():
            sheet_name = f"{int(100*noise_level)}% {noise_type} noise"
            sheet_df = generate_condition_results_table(
                df, ds_name_list, noise_type, noise_level,
            )
            sheet_df.to_excel(writer, sheet_name=sheet_name)