# Results Analysis 

In [29]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import re
pd.options.mode.chained_assignment = None  # 'warn' or None for default behavior
sns.set(style="whitegrid")

In [41]:
results_file = 'results/models_comparison_full.csv'
df = pd.read_csv(results_file)
df = df[~df['Architecture'].str.contains('inat') & ~df['Architecture'].str.contains('CLIP_finetuned')]
df.head()

Unnamed: 0,Architecture,Accuracy,Accuracy_TS,AUROC,AUROC_TS,ECE_15,ECE_15_TS,MCE_15,MCE_15_TS,Gamma Correlation,...,hAccuracy_darts_TS,hAccuracy_maxlikelihood_TS,hAccuracy_maxlikelihood_all_TS,marginal_coverage_selective_TS,marginal_coverage_climbing_TS,marginal_coverage_jumping_TS,marginal_coverage_maxcoverage_TS,marginal_coverage_darts_TS,marginal_coverage_maxlikelihood_TS,marginal_coverage_maxlikelihood_all_TS
0,alexnet_torchvision,56.554,56.50222,0.848762,0.847575,0.020001,0.020148,0.045109,0.045058,0.697524,...,,,,0.89585,0.89805,0.89972,0.90023,,,
1,bat_resnext26ts.ch_in1k,78.26,78.30667,0.852646,0.866543,0.152277,0.025664,0.207609,0.075454,0.705293,...,,,,0.89892,0.90335,0.90045,0.90068,,,
2,beit_base_patch16_224.in22k_ft_in22k_in1k,85.22,85.21778,0.86526,0.873466,0.073966,0.024624,0.131687,0.129022,0.73052,...,,,,0.901,0.90107,0.9018,0.89992,,,
3,beit_base_patch16_384.in22k_ft_in22k_in1k,86.82,86.75111,0.863012,0.872034,0.07445,0.029266,0.248498,0.209795,0.726025,...,,,,0.90235,0.90307,0.90207,0.90197,,,
4,beit_large_patch16_224.in22k_ft_in22k_in1k,87.478,87.39778,0.870189,0.878184,0.035049,0.031331,0.308009,0.122774,0.740379,...,,,,0.90097,0.90097,0.90043,0.9004,,,


Calculate Hierarchical Gain

In [42]:
# No TS <-> No TS, TS <-> TS
for inf_rule in ['selective', 'maxcoverage', 'climbing', 'maxlikelihood']:
    for suffix in ['hier', 'hier_TS', '01', '01_TS']:
        col_name = f'hAURC_{inf_rule}_{suffix}'
        df[col_name] = 1000*df[col_name]

for inf_rule in ['maxcoverage', 'climbing', 'maxlikelihood']:
    # for suffix in ['01', '01_TS']:
    for suffix in ['01', '01_TS', 'hier', 'hier_TS']:
        col_name = f'hAURC_{inf_rule}_{suffix}'
        df[f'Gain_{col_name}'] = 100*(df[f'hAURC_selective_{suffix}']-df[col_name])/df[f'hAURC_selective_{suffix}']

# Our algorithms <-> Baselines
for inf_rule in ['climbing', 'maxlikelihood']:
    col_name = f'hAURC_{inf_rule}_hier_TS'
    df[f'Gain_{col_name}_F'] = 100*(df[f'hAURC_selective_hier']-df[col_name])/df[f'hAURC_selective_hier']
    col_name = f'hAURC_{inf_rule}_01_TS'
    df[f'Gain_{col_name}_F'] = 100*(df[f'hAURC_selective_01']-df[col_name])/df[f'hAURC_selective_01']

hAURC and Gain results for inference rules

In [43]:
n = np.sqrt(len(df))
Valmadre = '{Valmadre}'
print(f'Selective & {df["hAURC_selective_01"].mean():.2f} $\pm$ {df["hAURC_selective_01"].std()/n:.2f} & - \\\\')
print(f"Max-Coverage \cite{Valmadre} & {df['hAURC_maxcoverage_01'].mean():.2f} $\pm$ {df['hAURC_maxcoverage_01'].std()/n:.2f} & {df['Gain_hAURC_maxcoverage_01'].mean():.2f} $\pm$ {df['Gain_hAURC_maxcoverage_01'].std()/n:.2f} \\\\")
print(f"Climbing (Ours) & {df['hAURC_climbing_01_TS'].mean():.2f} $\pm$ {df['hAURC_climbing_01_TS'].std()/n:.2f} & {df['Gain_hAURC_climbing_01_TS_F'].mean():.2f} $\pm$ {df['Gain_hAURC_climbing_01_TS_F'].std()/n:.2f} \\\\")
print(f"MP-Climb (Ours) & {df['hAURC_maxlikelihood_01_TS'].mean():.2f} $\pm$ {df['hAURC_maxlikelihood_01_TS'].std()/n:.2f} & {df['Gain_hAURC_maxlikelihood_01_TS_F'].mean():.2f} $\pm$ {df['Gain_hAURC_maxlikelihood_01_TS_F'].std()/n:.2f} \\\\")

Selective & 42.25 $\pm$ 0.46 & - \\
Max-Coverage \cite{Valmadre} & 39.96 $\pm$ 0.53 & 6.93 $\pm$ 0.26 \\
Climbing (Ours) & 36.49 $\pm$ 0.47 & 14.95 $\pm$ 0.22 \\
MP-Climb (Ours) & 36.34 $\pm$ 0.45 & 15.05 $\pm$ 0.16 \\


hAURC results for inference rules - appendix (01)

In [45]:
n = np.sqrt(len(df))
Valmadre = '{Valmadre}'
# No temp scaling
print(f'& Selective & {df["hAURC_selective_hier"].mean():.2f} $\pm$ {df["hAURC_selective_01"].std()/n:.2f} & - \\\\')
print(f"& Max-Coverage \cite{Valmadre} & {df['hAURC_maxcoverage_01'].mean():.2f} $\pm$ {df['hAURC_maxcoverage_01'].std()/n:.2f} & {df['Gain_hAURC_maxcoverage_01'].mean():.2f} $\pm$ {df['Gain_hAURC_maxcoverage_01'].std()/n:.2f} & {df['hAURC_maxcoverage_01'].mean():.2f} $\pm$ {df['hAURC_maxcoverage_01'].std()/n:.2f} \\\\")
print(f"& Climbing (Ours) & {df['hAURC_climbing_01'].mean():.2f} $\pm$ {df['hAURC_climbing_01'].std()/n:.2f} & {df['Gain_hAURC_climbing_01'].mean():.2f} $\pm$ {df['Gain_hAURC_climbing_01'].std()/n:.2f} \\\\")
print(f"& MP-Climb (Ours) & {df['hAURC_maxlikelihood_01'].mean():.2f} $\pm$ {df['hAURC_maxlikelihood_01'].std()/n:.2f} & {df['Gain_hAURC_maxlikelihood_01'].mean():.2f} $\pm$ {df['Gain_hAURC_maxlikelihood_01'].std()/n:.2f}\\\\")
# table formatting
print('\midrule\multirow{4}{4em}{\\textbf{Temp. Scaling}}')
# Temp Scaling
print(f'& Selective & {df["hAURC_selective_01_TS"].mean():.2f} $\pm$ {df["hAURC_selective_01_TS"].std()/n:.2f} & - \\\\')
print(f"& Max-Coverage \cite{Valmadre} & {df['hAURC_maxcoverage_01_TS'].mean():.2f} $\pm$ {df['hAURC_maxcoverage_01_TS'].std()/n:.2f} & {df['Gain_hAURC_maxcoverage_01_TS'].mean():.2f} $\pm$ {df['Gain_hAURC_maxcoverage_01_TS'].std()/n:.2f} \\\\")
print(f"& Climbing (Ours) & {df['hAURC_climbing_01_TS'].mean():.2f} $\pm$ {df['hAURC_climbing_01_TS'].std()/n:.2f} & {df['Gain_hAURC_climbing_01_TS'].mean():.2f} $\pm$ {df['Gain_hAURC_climbing_01_TS'].std()/n:.2f} \\\\")
print(f"& MP-Climb (Ours) & {df['hAURC_maxlikelihood_01_TS'].mean():.2f} $\pm$ {df['hAURC_maxlikelihood_01_TS'].std()/n:.2f} & {df['Gain_hAURC_maxlikelihood_01_TS'].mean():.2f} $\pm$ {df['Gain_hAURC_maxlikelihood_01_TS'].std()/n:.2f} \\\\")

& Selective & 24.98 $\pm$ 0.46 & - \\
& Max-Coverage \cite{Valmadre} & 39.96 $\pm$ 0.53 & 6.93 $\pm$ 0.26 & 39.96 $\pm$ 0.53 \\
& Climbing (Ours) & 39.21 $\pm$ 0.49 & 8.27 $\pm$ 0.19 \\
& MP-Climb (Ours) & 39.63 $\pm$ 0.47 & 6.83 $\pm$ 0.18\\
\midrule\multirow{4}{4em}{\textbf{Temp. Scaling}}
& Selective & 41.14 $\pm$ 0.46 & - \\
& Max-Coverage \cite{Valmadre} & 37.19 $\pm$ 0.52 & 11.10 $\pm$ 0.47 \\
& Climbing (Ours) & 36.49 $\pm$ 0.47 & 12.52 $\pm$ 0.16 \\
& MP-Climb (Ours) & 36.34 $\pm$ 0.45 & 12.58 $\pm$ 0.11 \\


## Utils 

In [46]:
def combine_dfs(df, bs_df, repeat_bs=False):
    cols = ['Architecture', 'hAURC_climbing_01_TS', 'hAURC_climbing_01', 'hAURC_selective_01', 'Accuracy', 'Accuracy_TS', 'Gain_hAURC_climbing_01_TS_F']
    df_combined = df[cols]
    if not repeat_bs:
        df_combined = df_combined.join(bs_df[cols].add_prefix('Baseline_'))
    else:
        df_combined['Baseline_Architecture'] = df['Baseline_Architecture']
        for i,row in df_combined.iterrows():
            bs_row = bs_df[bs_df['Architecture'] == row['Baseline_Architecture']]
            df_combined.loc[i, 'Baseline_Accuracy'] = bs_row['Accuracy'].values[0]
            df_combined.loc[i, 'Baseline_hAURC_selective_01'] = bs_row['hAURC_selective_01'].values[0]
            df_combined.loc[i, 'Baseline_hAURC_climbing_01_TS'] = bs_row['hAURC_climbing_01_TS'].values[0]
            df_combined.loc[i, 'Baseline_hAURC_climbing_01'] = bs_row['hAURC_climbing_01'].values[0]
            df_combined.loc[i, 'Baseline_Gain_hAURC_climbing_01_TS_F'] = bs_row['Gain_hAURC_climbing_01_TS_F'].values[0]
            
    df_combined['hAURC_improve_selective'] = 100*(df_combined['Baseline_hAURC_selective_01'] - df_combined['hAURC_selective_01'])/df_combined['Baseline_hAURC_selective_01']
    df_combined['hAURC_improve_climb_TS'] = 100*(df_combined['Baseline_hAURC_climbing_01_TS'] - df_combined['hAURC_climbing_01_TS'])/df_combined['Baseline_hAURC_climbing_01_TS']
    df_combined['hAURC_improve_climb'] = 100*(df_combined['Baseline_hAURC_climbing_01'] - df_combined['hAURC_climbing_01'])/df_combined['Baseline_hAURC_climbing_01']
    df_combined['Gain_climb_improve'] = 100*(df_combined['Gain_hAURC_climbing_01_TS_F'] - df_combined['Baseline_Gain_hAURC_climbing_01_TS_F'])/df_combined['Baseline_Gain_hAURC_climbing_01_TS_F']
    df_combined['Accuracy_improve'] = 100*(df_combined['Accuracy_TS'] - df_combined['Baseline_Accuracy'])/df_combined['Baseline_Accuracy']
    df_combined['Accuracy_delta'] = df_combined['Accuracy_TS'] - df_combined['Baseline_Accuracy']
    return df_combined

## Train Methods Comparison

### Distillation

In [47]:
distilled = ['hrnet_w18_ssld.paddle_in1k', 'hrnet_w48_ssld.paddle_in1k', 'pit_ti_distilled_224.in1k', 'ecaresnet101d.miil_in1k','ecaresnet50d.miil_in1k', 'efficientnet_b1_pruned.in1k','efficientnet_b3_pruned.in1k','deit_base_distilled_patch16_224.fb_in1k', 'deit_base_distilled_patch16_384.fb_in1k', 'deit_small_distilled_patch16_224.fb_in1k', 'deit_tiny_distilled_patch16_224.fb_in1k', 'fastvit_ma36.apple_dist_in1k', 'fastvit_s12.apple_dist_in1k', 'fastvit_sa12.apple_dist_in1k', 'fastvit_sa24.apple_dist_in1k', 'fastvit_t12.apple_dist_in1k', 'fastvit_t8.apple_dist_in1k', 'pit_b_distilled_224.in1k', 'pit_s_distilled_224.in1k', 'pit_xs_distilled_224.in1k', 'resmlp_12_224.fb_distilled_in1k', 'resmlp_24_224.fb_distilled_in1k', 'resmlp_36_224.fb_distilled_in1k', 'resmlp_big_24_224.fb_distilled_in1k', 'resnetv2_50x1_bit.goog_distilled_in1k', 'tiny_vit_11m_224.dist_in22k_ft_in1k', 'tiny_vit_21m_224.dist_in22k_ft_in1k', 'tiny_vit_5m_224.dist_in22k_ft_in1k', 'xcit_large_24_p16_224.fb_dist_in1k', 'xcit_large_24_p8_224.fb_dist_in1k', 'xcit_medium_24_p16_224.fb_dist_in1k', 'xcit_medium_24_p8_224.fb_dist_in1k', 'xcit_nano_12_p16_224.fb_dist_in1k', 'xcit_nano_12_p8_224.fb_dist_in1k', 'xcit_small_12_p16_224.fb_dist_in1k', 'xcit_small_12_p8_224.fb_dist_in1k', 'xcit_small_24_p16_224.fb_dist_in1k', 'xcit_small_24_p8_224.fb_dist_in1k', 'xcit_tiny_12_p16_224.fb_dist_in1k', 'xcit_tiny_12_p8_224.fb_dist_in1k', 'xcit_tiny_24_p16_224.fb_dist_in1k', 'xcit_tiny_24_p8_224.fb_dist_in1k', ]
baselines = ['hrnet_w18.ms_in1k', 'hrnet_w48.ms_in1k', 'pit_ti_224', 'ecaresnet101d_pruned.miil_in1k','ecaresnet50d_pruned.miil_in1k','efficientnet_b1_torchvision','efficientnet_b3_torchvision','deit_base_patch16_224.fb_in1k', 'deit_base_patch16_384.fb_in1k', 'deit_small_patch16_224.fb_in1k', 'deit_tiny_patch16_224.fb_in1k', 'fastvit_ma36.apple_in1k', 'fastvit_s12.apple_in1k', 'fastvit_sa12.apple_in1k', 'fastvit_sa24.apple_in1k', 'fastvit_t12.apple_in1k', 'fastvit_t8.apple_in1k', 'pit_b_224.in1k', 'pit_s_224.in1k', 'pit_xs_224.in1k', 'resmlp_12_224.fb_in1k', 'resmlp_24_224.fb_in1k', 'resmlp_36_224.fb_in1k', 'resmlp_big_24_224.fb_in1k', 'resnetv2_50x1_bit.goog_in21k_ft_in1k', 'tiny_vit_11m_224.in1k', 'tiny_vit_21m_224.in1k', 'tiny_vit_5m_224.in1k', 'xcit_large_24_p16_224.fb_in1k', 'xcit_large_24_p8_224.fb_in1k', 'xcit_medium_24_p16_224.fb_in1k', 'xcit_medium_24_p8_224.fb_in1k', 'xcit_nano_12_p16_224.fb_in1k', 'xcit_nano_12_p8_224.fb_in1k', 'xcit_small_12_p16_224.fb_in1k', 'xcit_small_12_p8_224.fb_in1k', 'xcit_small_24_p16_224.fb_in1k', 'xcit_small_24_p8_224.fb_in1k', 'xcit_tiny_12_p16_224.fb_in1k', 'xcit_tiny_12_p8_224.fb_in1k', 'xcit_tiny_24_p16_224.fb_in1k', 'xcit_tiny_24_p8_224.fb_in1k', ]

dist_df = df[df['Architecture'].isin(distilled)].sort_values(by=['Architecture']).reset_index(drop=True)
dist_bs_df = df[df['Architecture'].isin(baselines)].sort_values(by=['Architecture']).reset_index(drop=True)
df_dist_combined = combine_dfs(dist_df, dist_bs_df)

In [64]:
df_dist_combined.sort_values(by='hAURC_improve_climb_TS', ascending=False)

Unnamed: 0,Architecture,hAURC_climbing_01_TS,hAURC_climbing_01,hAURC_selective_01,Accuracy,Accuracy_TS,Gain_hAURC_climbing_01_TS_F,Baseline_Architecture,Baseline_hAURC_climbing_01_TS,Baseline_hAURC_climbing_01,...,Baseline_Accuracy,Baseline_Accuracy_TS,Baseline_Gain_hAURC_climbing_01_TS_F,hAURC_improve_selective,hAURC_improve_climb_TS,hAURC_improve_climb,Gain_climb_improve,Accuracy_improve,Accuracy_delta,Method
31,xcit_medium_24_p8_224.fb_dist_in1k,24.358514,25.473224,28.691264,85.076,85.1,15.101287,xcit_nano_12_p16_224.fb_in1k,68.427731,75.536756,...,69.964,70.02222,9.562289,62.080106,64.402569,66.277048,57.92545,21.633983,15.136,Distillation (42)
37,xcit_small_24_p8_224.fb_dist_in1k,24.818531,26.099464,29.46127,84.864,84.89111,15.758789,xcit_tiny_12_p16_224.fb_in1k,43.87758,48.810343,...,77.132,77.17333,13.792726,42.116776,43.436874,46.528825,14.254344,10.059521,7.75911,Distillation (42)
26,tiny_vit_21m_224.dist_in22k_ft_in1k,22.599901,25.25094,28.644944,85.088,85.09333,21.103351,tiny_vit_5m_224.in1k,38.741833,49.545507,...,79.168,79.17778,19.692549,40.622262,41.665381,49.034853,7.16414,7.484501,5.92533,Distillation (42)
15,hrnet_w48_ssld.paddle_in1k,27.504938,25.747985,32.359611,83.634,83.58667,15.00226,hrnet_w48.ms_in1k,37.926222,36.642363,...,79.326,79.36444,13.335827,26.055939,27.477781,29.731648,12.495904,5.371089,4.26067,Distillation (42)
29,xcit_large_24_p8_224.fb_dist_in1k,24.025491,25.188992,28.505322,85.406,85.41778,15.715771,xcit_medium_24_p16_224.fb_in1k,32.728176,35.474514,...,82.628,82.60444,13.810999,24.931803,26.590805,28.994115,13.791699,3.376313,2.78978,Distillation (42)
14,hrnet_w18_ssld.paddle_in1k,34.349445,32.419551,39.70467,81.088,81.02444,13.487644,hrnet_w18.ms_in1k,45.576768,44.30601,...,76.756,76.57555,10.253976,21.816828,24.633872,26.828097,31.535756,5.561051,4.26844,Distillation (42)
23,resmlp_big_24_224.fb_distilled_in1k,26.800655,28.001992,31.863845,83.592,83.62667,15.890077,resnetv2_50x1_bit.goog_in21k_ft_in1k,34.124503,34.818187,...,80.34,80.32222,17.02055,22.517718,21.462137,19.576536,-6.64181,4.090951,3.28667,Distillation (42)
7,efficientnet_b3_pruned.in1k,34.044798,38.656931,41.001619,80.852,80.83111,16.967187,efficientnet_b3_torchvision,41.846,45.043094,...,78.54,78.64,13.826233,15.565073,18.642647,14.177896,22.717348,2.917125,2.29111,Distillation (42)
4,ecaresnet101d.miil_in1k,32.599454,40.280202,41.116189,82.16,82.16889,20.713824,ecaresnet101d_pruned.miil_in1k,39.563883,40.793019,...,80.816,80.91111,12.424445,8.988379,17.602996,1.257121,66.718301,1.674037,1.35289,Distillation (42)
19,pit_xs_distilled_224.in1k,38.373537,39.149645,43.663315,79.166,79.18667,12.114926,resmlp_12_224.fb_in1k,46.231752,52.016306,...,76.646,76.62666,13.352233,18.166011,16.997443,24.735824,-9.266672,3.314811,2.54067,Distillation (42)


### Imagenet21K/22K

In [49]:
pretrained_21_models = ['tresnet_m.miil_in21k_ft_in1k', 'tresnet_v2_l.miil_in21k_ft_in1k', 'maxvit_base_tf_384.in21k_ft_in1k', 'tf_efficientnetv2_b3.in21k_ft_in1k', 'tf_efficientnetv2_l.in21k_ft_in1k', 'tf_efficientnetv2_m.in21k_ft_in1k', 'tf_efficientnetv2_s.in21k_ft_in1k', 'tresnet_m.miil_in21k_ft_in1k', 'vit_base_patch16_224.augreg_in21k_ft_in1k', 'vit_base_patch16_384.augreg_in21k_ft_in1k', 'vit_base_patch32_224.augreg_in21k_ft_in1k', 'vit_base_patch32_384.augreg_in21k_ft_in1k', 'vit_small_patch16_224.augreg_in21k_ft_in1k', 'vit_small_patch16_384.augreg_in21k_ft_in1k', ]
pretrained_22_models = ['caformer_b36.sail_in22k_ft_in1k', 'caformer_b36.sail_in22k_ft_in1k_384', 'caformer_m36.sail_in22k_ft_in1k', 'caformer_m36.sail_in22k_ft_in1k_384', 'caformer_s18.sail_in22k_ft_in1k', 'caformer_s18.sail_in22k_ft_in1k_384', 'caformer_s36.sail_in22k_ft_in1k', 'caformer_s36.sail_in22k_ft_in1k_384', 'convformer_b36.sail_in22k_ft_in1k', 'convformer_b36.sail_in22k_ft_in1k_384', 'convformer_m36.sail_in22k_ft_in1k', 'convformer_m36.sail_in22k_ft_in1k_384', 'convformer_s18.sail_in22k_ft_in1k', 'convformer_s18.sail_in22k_ft_in1k_384', 'convformer_s36.sail_in22k_ft_in1k', 'convformer_s36.sail_in22k_ft_in1k_384', 'convnext_base.fb_in22k_ft_in1k', 'convnext_large.fb_in22k_ft_in1k', 'convnext_small.fb_in22k_ft_in1k', 'convnext_tiny.fb_in22k_ft_in1k', 'deit3_base_patch16_224.fb_in22k_ft_in1k', 'deit3_base_patch16_384.fb_in22k_ft_in1k', 'deit3_huge_patch14_224.fb_in22k_ft_in1k', 'deit3_large_patch16_224.fb_in22k_ft_in1k', 'deit3_large_patch16_384.fb_in22k_ft_in1k', 'deit3_medium_patch16_224.fb_in22k_ft_in1k', 'deit3_small_patch16_224.fb_in22k_ft_in1k', 'deit3_small_patch16_384.fb_in22k_ft_in1k', 'mobilevitv2_150.cvnets_in22k_ft_in1k', 'mobilevitv2_175.cvnets_in22k_ft_in1k', 'mobilevitv2_200.cvnets_in22k_ft_in1k', 'resmlp_big_24_224.fb_in22k_ft_in1k', 'swin_base_patch4_window12_384.ms_in22k_ft_in1k', 'swin_base_patch4_window7_224.ms_in22k_ft_in1k', 'swin_small_patch4_window7_224.ms_in22k_ft_in1k', 'swin_tiny_patch4_window7_224.ms_in22k_ft_in1k', 'tiny_vit_11m_224.dist_in22k_ft_in1k', 'tiny_vit_21m_224.dist_in22k_ft_in1k', 'tiny_vit_5m_224.dist_in22k_ft_in1k']
pretrained_21_models += pretrained_22_models
pretrained_21_baselines = ['tresnet_m.miil_in1k', 'tresnet_l.miil_in1k', 'maxvit_base_tf_384.in1k', 'tf_efficientnetv2_b3.in1k', 'tf_efficientnetv2_l.in1k', 'tf_efficientnetv2_m.in1k', 'tf_efficientnetv2_s.in1k', 'tresnet_m.miil_in1k', 'vit_base_patch16_224.augreg_in1k', 'vit_base_patch16_384.augreg_in1k', 'vit_base_patch32_224.augreg_in1k', 'vit_base_patch32_384.augreg_in1k', 'vit_small_patch16_224.augreg_in1k', 'vit_small_patch16_384.augreg_in1k' ]
pretrained_22_baselines = ['caformer_b36.sail_in1k', 'caformer_b36.sail_in1k_384', 'caformer_m36.sail_in1k', 'caformer_m36.sail_in1k_384', 'caformer_s18.sail_in1k', 'caformer_s18.sail_in1k_384', 'caformer_s36.sail_in1k', 'caformer_s36.sail_in1k_384', 'convformer_b36.sail_in1k', 'convformer_b36.sail_in1k_384', 'convformer_m36.sail_in1k', 'convformer_m36.sail_in1k_384', 'convformer_s18.sail_in1k', 'convformer_s18.sail_in1k_384', 'convformer_s36.sail_in1k', 'convformer_s36.sail_in1k_384', 'convnext_base.fb_in1k', 'convnext_large.fb_in1k', 'convnext_small.fb_in1k', 'convnext_tiny.fb_in1k', 'deit3_base_patch16_224.fb_in1k', 'deit3_base_patch16_384.fb_in1k', 'deit3_huge_patch14_224.fb_in1k', 'deit3_large_patch16_224.fb_in1k', 'deit3_large_patch16_384.fb_in1k', 'deit3_medium_patch16_224.fb_in1k', 'deit3_small_patch16_224.fb_in1k', 'deit3_small_patch16_384.fb_in1k', 'mobilevitv2_150.cvnets_in1k', 'mobilevitv2_175.cvnets_in1k', 'mobilevitv2_200.cvnets_in1k', 'resmlp_big_24_224.fb_in1k', 'swin_base_patch4_window12_384.ms_in1k', 'swin_base_patch4_window7_224.ms_in1k', 'swin_small_patch4_window7_224.ms_in1k', 'swin_tiny_patch4_window7_224.ms_in1k', 'tiny_vit_11m_224.in1k', 'tiny_vit_21m_224.in1k', 'tiny_vit_5m_224.in1k']
pretrained_21_baselines += pretrained_22_baselines

pre_21_df = df[df['Architecture'].isin(pretrained_21_models)].sort_values(by=['Architecture']).reset_index(drop=True)
pre_21_bs_df = df[df['Architecture'].isin(pretrained_21_baselines)].sort_values(by=['Architecture']).reset_index(drop=True)
pre_21_df_combined = combine_dfs(pre_21_df, pre_21_bs_df)

In [50]:
clip_ft_1k = {'convnext_base.clip_laion2b_augreg_ft_in1k': 'convnext_base.fb_in1k',
'eva_giant_patch14_336.clip_ft_in1k': 'eva_giant_patch14_336.m30m_ft_in22k_in1k',
'vit_base_patch16_clip_224.laion2b_ft_in1k': 'vit_base_patch16_224.augreg_in1k',
'vit_base_patch16_clip_224.openai_ft_in1k': 'vit_base_patch16_224.augreg_in1k',
'vit_base_patch16_clip_384.laion2b_ft_in1k': 'vit_base_patch16_384.augreg_in1k',
'vit_base_patch16_clip_384.openai_ft_in1k': 'vit_base_patch16_384.augreg_in1k',
'vit_base_patch32_clip_224.laion2b_ft_in1k': 'vit_base_patch32_224.augreg_in1k',
'vit_base_patch32_clip_224.openai_ft_in1k': 'vit_base_patch32_224.augreg_in1k'}

clip_ft_1k_df = df[df['Architecture'].isin(clip_ft_1k.keys())].sort_values(by=['Architecture']).reset_index(drop=True)
clip_ft_1k_bs_df = df[df['Architecture'].isin(clip_ft_1k.values())].sort_values(by=['Architecture']).reset_index(drop=True)
clip_ft_1k_df['Baseline_Architecture'] = clip_ft_1k_df.apply(lambda x: clip_ft_1k[x['Architecture']], axis=1)
clip_ft_1k_combined = combine_dfs(clip_ft_1k_df, clip_ft_1k_bs_df, repeat_bs=True)

In [51]:
clip_ft_1k_combined

Unnamed: 0,Architecture,hAURC_climbing_01_TS,hAURC_climbing_01,hAURC_selective_01,Accuracy,Accuracy_TS,Gain_hAURC_climbing_01_TS_F,Baseline_Architecture,Baseline_Accuracy,Baseline_hAURC_selective_01,Baseline_hAURC_climbing_01_TS,Baseline_hAURC_climbing_01,Baseline_Gain_hAURC_climbing_01_TS_F,hAURC_improve_selective,hAURC_improve_climb_TS,hAURC_improve_climb,Gain_climb_improve,Accuracy_improve,Accuracy_delta
0,convnext_base.clip_laion2b_augreg_ft_in1k,22.50891,24.267095,27.65616,86.16,86.18444,18.611586,convnext_base.fb_in1k,83.846,32.398516,27.778419,28.875527,14.260214,14.637571,18.969792,15.959645,30.514071,2.78897,2.33844
1,eva_giant_patch14_336.clip_ft_in1k,14.833927,19.206481,19.24721,89.46,89.43778,22.929471,eva_giant_patch14_336.m30m_ft_in22k_in1k,89.562,20.402902,14.161338,22.934244,30.591551,5.664349,-4.749475,16.254136,-25.046395,-0.138697,-0.12422
2,vit_base_patch16_clip_224.laion2b_ft_in1k,22.301936,24.720175,28.262049,85.49,85.48889,21.088751,vit_base_patch16_224.augreg_in1k,79.14,41.84927,36.299072,35.643331,13.262353,32.467045,38.56059,30.645723,59.012134,8.022353,6.34889
3,vit_base_patch16_clip_224.openai_ft_in1k,23.514496,25.822446,29.193564,85.292,85.29778,19.453152,vit_base_patch16_224.augreg_in1k,79.14,41.84927,36.299072,35.643331,13.262353,30.241163,35.220119,27.55322,46.679493,7.780869,6.15778
4,vit_base_patch16_clip_384.laion2b_ft_in1k,20.331822,22.561338,25.861781,86.628,86.68,21.382748,vit_base_patch16_384.augreg_in1k,81.09,36.654383,30.885942,30.245931,15.737386,29.444233,34.171274,25.40703,35.872301,6.893575,5.59
5,vit_base_patch16_clip_384.openai_ft_in1k,22.29249,24.528822,27.767693,86.202,86.16889,19.71789,vit_base_patch16_384.augreg_in1k,81.09,36.654383,30.885942,30.245931,15.737386,24.244551,27.823183,18.902076,25.293302,6.263275,5.07889
6,vit_base_patch32_clip_224.laion2b_ft_in1k,29.55445,31.189983,35.078278,82.578,82.47333,15.747146,vit_base_patch32_224.augreg_in1k,74.914,54.83179,49.615701,49.588582,9.512891,36.025656,40.433271,37.102491,65.534809,10.090677,7.55933
7,vit_base_patch32_clip_224.openai_ft_in1k,30.560613,32.209363,36.330071,81.95,81.97333,15.880668,vit_base_patch32_224.augreg_in1k,74.914,54.83179,49.615701,49.588582,9.512891,33.742686,38.405359,35.046816,66.938404,9.423245,7.05933


### Finetuned on 12K 

In [52]:
clip_ft_12k = ['convnext_base.clip_laion2b_augreg_ft_in12k_in1k',  'vit_base_patch16_clip_224.laion2b_ft_in12k_in1k',  'vit_base_patch16_clip_224.openai_ft_in12k_in1k',  'vit_base_patch16_clip_384.laion2b_ft_in12k_in1k',  'vit_base_patch16_clip_384.openai_ft_in12k_in1k',  'vit_base_patch32_clip_224.laion2b_ft_in12k_in1k',  'vit_base_patch32_clip_384.laion2b_ft_in12k_in1k',  'vit_base_patch32_clip_384.openai_ft_in12k_in1k']
clip_ft_12k_baselines = ['convnext_base.fb_in1k', 'vit_base_patch16_224.augreg_in1k', 'vit_base_patch16_224.augreg_in1k', 'vit_base_patch16_384.augreg_in1k', 'vit_base_patch16_384.augreg_in1k', 'vit_base_patch32_224.augreg_in1k', 'vit_base_patch32_384.augreg_in1k', 'vit_base_patch32_384.augreg_in1k' ]
clip_ft_12k = {'convnext_base.clip_laion2b_augreg_ft_in12k_in1k':'convnext_base.fb_in1k',
'vit_base_patch16_clip_224.laion2b_ft_in12k_in1k':'vit_base_patch16_224.augreg_in1k',
'vit_base_patch16_clip_224.openai_ft_in12k_in1k':'vit_base_patch16_224.augreg_in1k',
'vit_base_patch16_clip_384.laion2b_ft_in12k_in1k':'vit_base_patch16_384.augreg_in1k',
'vit_base_patch16_clip_384.openai_ft_in12k_in1k':'vit_base_patch16_384.augreg_in1k',
'vit_base_patch32_clip_224.laion2b_ft_in12k_in1k':'vit_base_patch32_224.augreg_in1k',
'vit_base_patch32_clip_384.laion2b_ft_in12k_in1k':'vit_base_patch32_384.augreg_in1k',
'vit_base_patch32_clip_384.openai_ft_in12k_in1k':'vit_base_patch32_384.augreg_in1k',
 }

clip_ft_12k_df = df[df['Architecture'].isin(clip_ft_12k.keys())].sort_values(by=['Architecture']).reset_index(drop=True)
clip_ft_12k_bs_df = df[df['Architecture'].isin(clip_ft_12k.values())].sort_values(by=['Architecture']).reset_index(drop=True)
clip_ft_12k_df['Baseline_Architecture'] = clip_ft_12k_df.apply(lambda x: clip_ft_12k[x['Architecture']], axis=1)
clip_ft_12k_df_combined = combine_dfs(clip_ft_12k_df, clip_ft_12k_bs_df, repeat_bs=True)


In [53]:
# combine clip_ft_12k so that we have a baseline for each model
clip_ft_12k_df = df[df['Architecture'].isin(clip_ft_12k.keys())].sort_values(by=['Architecture']).reset_index(drop=True)
clip_ft_12k_bs_df = df[df['Architecture'].isin(clip_ft_12k.values())].sort_values(by=['Architecture']).reset_index(drop=True)
cols = ['Architecture', 'hAURC_climbing_01_TS', 'hAURC_climbing_01', 'hAURC_selective_01', 'Accuracy', 'Accuracy_TS', 'Gain_hAURC_climbing_01_TS_F']
clip_ft_12k_combined = clip_ft_12k_df[cols]
clip_ft_12k_combined['Baseline_Architecture'] = clip_ft_12k_df.apply(lambda x: clip_ft_12k[x['Architecture']], axis=1)
# calculate Accuracy_improve: the difference between the accuracy of the model and its baseline.
# for that we need to get the baseline accuracy
for i,row in clip_ft_12k_combined.iterrows():
    bs_row = clip_ft_12k_bs_df[clip_ft_12k_bs_df['Architecture'] == row['Baseline_Architecture']]
    clip_ft_12k_combined.loc[i, 'Baseline_Accuracy'] = bs_row['Accuracy'].values[0]
    clip_ft_12k_combined.loc[i, 'Baseline_hAURC_selective_01'] = bs_row['hAURC_selective_01'].values[0]
    clip_ft_12k_combined.loc[i, 'Baseline_hAURC_climbing_01_TS'] = bs_row['hAURC_climbing_01_TS'].values[0]
    clip_ft_12k_combined.loc[i, 'Baseline_hAURC_climbing_01'] = bs_row['hAURC_climbing_01'].values[0]
    clip_ft_12k_combined.loc[i, 'Baseline_Gain_hAURC_climbing_01_TS_F'] = bs_row['Gain_hAURC_climbing_01_TS_F'].values[0]
    clip_ft_12k_combined['hAURC_improve_selective'] = 100*(clip_ft_12k_combined['Baseline_hAURC_selective_01'] - clip_ft_12k_combined['hAURC_selective_01'])/clip_ft_12k_combined['Baseline_hAURC_selective_01']
    clip_ft_12k_combined['hAURC_improve_climb_TS'] = 100*(clip_ft_12k_combined['Baseline_hAURC_climbing_01_TS'] - clip_ft_12k_combined['hAURC_climbing_01_TS'])/clip_ft_12k_combined['Baseline_hAURC_climbing_01_TS']
    clip_ft_12k_combined['hAURC_improve_climb'] = 100*(clip_ft_12k_combined['Baseline_hAURC_climbing_01'] - clip_ft_12k_combined['hAURC_climbing_01'])/clip_ft_12k_combined['Baseline_hAURC_climbing_01']
    clip_ft_12k_combined['Accuracy_improve'] = 100*(clip_ft_12k_combined['Accuracy_TS'] - clip_ft_12k_combined['Baseline_Accuracy'])/clip_ft_12k_combined['Baseline_Accuracy']
    clip_ft_12k_combined['Accuracy_delta'] = clip_ft_12k_combined['Accuracy_TS'] - clip_ft_12k_combined['Baseline_Accuracy']
    clip_ft_12k_combined['Gain_climb_improve'] = 100*(clip_ft_12k_combined['Gain_hAURC_climbing_01_TS_F'] - clip_ft_12k_combined['Baseline_Gain_hAURC_climbing_01_TS_F'])/clip_ft_12k_combined['Baseline_Gain_hAURC_climbing_01_TS_F']

In [54]:
pretrained_12_models = ['coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k', 'coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k', 'convnext_nano.in12k_ft_in1k',  'convnext_small.in12k_ft_in1k', 'convnext_tiny.in12k_ft_in1k', 'regnety_120.sw_in12k_ft_in1k',  'regnety_160.sw_in12k_ft_in1k',  'rexnetr_200.sw_in12k_ft_in1k', 'seresnextaa101d_32x8d.sw_in12k_ft_in1k']
pretrained_12_baselines = ['coatnet_rmlp_1_rw_224.sw_in1k', 'coatnet_rmlp_2_rw_224.sw_in1k', 'convnext_nano.d1h_in1k', 'convnext_small_torchvision', 'convnext_tiny_torchvision', 'regnety_120.pycls_in1k', 'regnety_160.pycls_in1k', 'rexnet_200.nav_in1k', 'seresnextaa101d_32x8d.ah_in1k']
len(pretrained_12_models), len(pretrained_12_baselines)

pretrained_12_df = df[df['Architecture'].isin(pretrained_12_models)].sort_values(by=['Architecture']).reset_index(drop=True)
pretrained_12_bs_df = df[df['Architecture'].isin(pretrained_12_baselines)].sort_values(by=['Architecture']).reset_index(drop=True)
pretrained_12_combined = combine_dfs(pretrained_12_df, pretrained_12_bs_df)

### SSL

In [55]:
ssl = {'resnet18.fb_ssl_yfcc100m_ft_in1k': 'resnet18.tv_in1k',
'resnet50.fb_ssl_yfcc100m_ft_in1k': 'resnet50.tv_in1k', 
'resnext50_32x4d.fb_ssl_yfcc100m_ft_in1k':  'resnext50_32x4d.tv_in1k',
'resnext101_32x4d.fb_ssl_yfcc100m_ft_in1k': 'resnext101_32x4d.gluon_in1k',
'resnext101_32x8d.fb_ssl_yfcc100m_ft_in1k': 'resnext101_32x8d.tv_in1k',
'resnet18.fb_swsl_ig1b_ft_in1k': 'resnet18.tv_in1k',
'resnet50.fb_swsl_ig1b_ft_in1k': 'resnet50.tv_in1k',
'resnext101_32x4d.fb_swsl_ig1b_ft_in1k': 'resnext101_32x4d.gluon_in1k',
'resnext101_32x8d.fb_swsl_ig1b_ft_in1k': 'resnext101_32x8d.tv_in1k',
'resnext50_32x4d.fb_swsl_ig1b_ft_in1k': 'resnext50_32x4d.tv_in1k',
'resnext101_32x8d.fb_wsl_ig1b_ft_in1k': 'resnext101_32x8d.tv_in1k'}
len(ssl)


11

In [56]:
# combine ssl so that we have a baseline for each model
ssl_df = df[df['Architecture'].isin(ssl.keys())].sort_values(by=['Architecture']).reset_index(drop=True)
ssl_bs_df = df[df['Architecture'].isin(ssl.values())].sort_values(by=['Architecture']).reset_index(drop=True)
cols = ['Architecture', 'hAURC_climbing_01_TS', 'hAURC_climbing_01', 'hAURC_selective_01', 'Accuracy', 'Accuracy_TS', 'Gain_hAURC_climbing_01_TS_F']
ssl_combined = ssl_df[cols]
ssl_combined['Baseline_Architecture'] = ssl_df.apply(lambda x: ssl[x['Architecture']], axis=1)
# calculate Accuracy_improve: the difference between the accuracy of the model and its baseline.
# for that we need to get the baseline accuracy
for i,row in ssl_combined.iterrows():
    bs_row = ssl_bs_df[ssl_bs_df['Architecture'] == row['Baseline_Architecture']]
    ssl_combined.loc[i, 'Baseline_Accuracy'] = bs_row['Accuracy'].values[0]
    ssl_combined.loc[i, 'Baseline_hAURC_selective_01'] = bs_row['hAURC_selective_01'].values[0]
    ssl_combined.loc[i, 'Baseline_hAURC_climbing_01_TS'] = bs_row['hAURC_climbing_01_TS'].values[0]
    ssl_combined.loc[i, 'Baseline_hAURC_climbing_01'] = bs_row['hAURC_climbing_01'].values[0]
    ssl_combined.loc[i, 'Baseline_Gain_hAURC_climbing_01_TS_F'] = bs_row['Gain_hAURC_climbing_01_TS_F'].values[0]
    ssl_combined['hAURC_improve_selective'] = 100*(ssl_combined['Baseline_hAURC_selective_01'] - ssl_combined['hAURC_selective_01'])/ssl_combined['Baseline_hAURC_selective_01']
    ssl_combined['hAURC_improve_climb_TS'] = 100*(ssl_combined['Baseline_hAURC_climbing_01_TS'] - ssl_combined['hAURC_climbing_01_TS'])/ssl_combined['Baseline_hAURC_climbing_01_TS']
    ssl_combined['hAURC_improve_climb'] = 100*(ssl_combined['Baseline_hAURC_climbing_01'] - ssl_combined['hAURC_climbing_01'])/ssl_combined['Baseline_hAURC_climbing_01']
    ssl_combined['Accuracy_improve'] = 100*(ssl_combined['Accuracy_TS'] - ssl_combined['Baseline_Accuracy'])/ssl_combined['Baseline_Accuracy']
    ssl_combined['Accuracy_delta'] = ssl_combined['Accuracy_TS'] - ssl_combined['Baseline_Accuracy']
    ssl_combined['Gain_climb_improve'] = 100*(ssl_combined['Gain_hAURC_climbing_01_TS_F'] - ssl_combined['Baseline_Gain_hAURC_climbing_01_TS_F'])/ssl_combined['Baseline_Gain_hAURC_climbing_01_TS_F']

ssl_combined['Method'] = 'SSL'

### Adversarial Training

In [57]:
adv = {'tf_efficientnet_b0.ap_in1k': 'tf_efficientnet_b0.in1k',
'tf_efficientnet_b1.ap_in1k': 'tf_efficientnet_b1.in1k',
'tf_efficientnet_b2.ap_in1k': 'tf_efficientnet_b2.in1k',
'tf_efficientnet_b3.ap_in1k': 'tf_efficientnet_b3.in1k',
'tf_efficientnet_b4.ap_in1k': 'tf_efficientnet_b4.in1k',
'tf_efficientnet_b5.ap_in1k': 'tf_efficientnet_b5.in1k',
'inception_resnet_v2.tf_ens_adv_in1k': 'inception_resnet_v2.tf_in1k',
'inception_v3.tf_adv_in1k': 'inception_v3.tf_in1k',}

In [58]:
# combine adv so that we have a baseline for each model
adv_df = df[df['Architecture'].isin(adv.keys())].sort_values(by=['Architecture']).reset_index(drop=True)
adv_bs_df = df[df['Architecture'].isin(adv.values())].sort_values(by=['Architecture']).reset_index(drop=True)
cols = ['Architecture', 'hAURC_climbing_01_TS', 'hAURC_climbing_01', 'hAURC_selective_01', 'Accuracy', 'Accuracy_TS', 'Gain_hAURC_climbing_01_TS_F']
adv_combined = adv_df[cols]
adv_combined['Baseline_Architecture'] = adv_df.apply(lambda x: adv[x['Architecture']], axis=1)
# calculate Accuracy_improve: the difference between the accuracy of the model and its baseline.
# for that we need to get the baseline accuracy
for i,row in adv_combined.iterrows():
    bs_row = adv_bs_df[adv_bs_df['Architecture'] == row['Baseline_Architecture']]
    adv_combined.loc[i, 'Baseline_Accuracy'] = bs_row['Accuracy'].values[0]
    adv_combined.loc[i, 'Baseline_hAURC_selective_01'] = bs_row['hAURC_selective_01'].values[0]
    adv_combined.loc[i, 'Baseline_hAURC_climbing_01_TS'] = bs_row['hAURC_climbing_01_TS'].values[0]
    adv_combined.loc[i, 'Baseline_hAURC_climbing_01'] = bs_row['hAURC_climbing_01'].values[0]
    adv_combined.loc[i, 'Baseline_Gain_hAURC_climbing_01_TS_F'] = bs_row['Gain_hAURC_climbing_01_TS_F'].values[0]
    adv_combined['hAURC_improve_selective'] = 100*(adv_combined['Baseline_hAURC_selective_01'] - adv_combined['hAURC_selective_01'])/adv_combined['Baseline_hAURC_selective_01']
    adv_combined['hAURC_improve_climb_TS'] = 100*(adv_combined['Baseline_hAURC_climbing_01_TS'] - adv_combined['hAURC_climbing_01_TS'])/adv_combined['Baseline_hAURC_climbing_01_TS']
    adv_combined['hAURC_improve_climb'] = 100*(adv_combined['Baseline_hAURC_climbing_01'] - adv_combined['hAURC_climbing_01'])/adv_combined['Baseline_hAURC_climbing_01']
    adv_combined['Accuracy_improve'] = 100*(adv_combined['Accuracy_TS'] - adv_combined['Baseline_Accuracy'])/adv_combined['Baseline_Accuracy']
    adv_combined['Accuracy_delta'] = adv_combined['Accuracy_TS'] - adv_combined['Baseline_Accuracy']
    adv_combined['Gain_climb_improve'] = 100*(adv_combined['Gain_hAURC_climbing_01_TS_F'] - adv_combined['Baseline_Gain_hAURC_climbing_01_TS_F'])/adv_combined['Baseline_Gain_hAURC_climbing_01_TS_F']

adv_combined['Method'] = 'Adversarial'

In [59]:
all_dfs = [df_dist_combined, pre_21_df_combined, clip_ft_1k_combined, clip_ft_12k_combined, pretrained_12_combined]
all_dfs_names = ['Distillation', 'Pretraining on Imagenet-21K', 'CLIP Training (finetuned on 1K)', 'CLIP Training (finetuned on 12K and 1K)', 'Pretraining on Imagenet-12K']
# make one big df but add a column for the name of the Method df
all_dfs = pd.concat(all_dfs, ignore_index=True)
all_dfs['Method'] = ''
all_dfs.loc[:len(df_dist_combined)-1, 'Method'] = f'Distillation ({len(df_dist_combined)})'
all_dfs.loc[len(df_dist_combined):len(df_dist_combined)+len(pre_21_df_combined)-1, 'Method'] = f'Pretraining on Imagenet-21K ({len(pre_21_df_combined)})'
all_dfs.loc[len(df_dist_combined)+len(pre_21_df_combined):len(df_dist_combined)+len(pre_21_df_combined)+len(clip_ft_1k_combined)-1, 'Method'] = f'CLIP Training (finetuned on 1K) ({len(clip_ft_1k_combined)})'
all_dfs.loc[len(df_dist_combined)+len(pre_21_df_combined)+len(clip_ft_1k_combined):len(df_dist_combined)+len(pre_21_df_combined)+len(clip_ft_1k_combined)+len(clip_ft_12k_combined)-1, 'Method'] = f'CLIP Training (finetuned on 12K and 1K) ({len(clip_ft_12k_combined)})'
all_dfs.loc[len(df_dist_combined)+len(pre_21_df_combined)+len(clip_ft_1k_combined)+len(clip_ft_12k_combined):, 'Method'] = f'Pretraining on Imagenet-12K ({len(pretrained_12_combined)})'

Unite Pretraining and CLIP

In [60]:
all_dfs = [df_dist_combined, pre_21_df_combined, clip_ft_1k_combined, clip_ft_12k_combined, pretrained_12_combined, ssl_combined, adv_combined]

pretrained_combined = pd.concat([pre_21_df_combined, pretrained_12_combined], ignore_index=True)
clip_combined = pd.concat([clip_ft_1k_combined, clip_ft_12k_combined], ignore_index=True)
df_dist_combined['Method'] = f'Distillation ({len(df_dist_combined)})'
pretrained_combined['Method'] = f'Pretraining ({len(pretrained_combined)})'
clip_combined['Method'] = f'CLIP ({len(clip_combined)})'
ssl_combined['Method'] = f'SSL ({len(ssl_combined)})'
adv_combined['Method'] = f'Adversarial ({len(adv_combined)})'
all_dfs = [df_dist_combined, pretrained_combined, clip_combined, ssl_combined, adv_combined]
all_dfs = pd.concat(all_dfs, ignore_index=True)
all_dfs.to_csv('all_dfs.csv', index=False)

In [66]:
all_dfs = pd.read_csv('all_dfs.csv')

## Section 5 Plot

In [67]:
import plotly.express as px
import plotly.graph_objects as go

In [68]:
delta = {
    'Knowledge Distillation': 4.5,
    'Pretraining (ImageNet21k or ImageNet12k)': 4.5,
    'Contrastive Language-Image Pretraining (CLIP)': -1,
    'Semi-Supervised Learning': 5.5,
    'Adversarial Training': 1.2
}

In [69]:
y_axis = 'hAURC Improvement over Baseline (%)'
legend_x_cord, legend_y_cord = 0, 10.0
full_results_df = all_dfs
full_results_df = full_results_df.rename(columns={'hAURC_improve_climb_TS': 'hAURC Improvement over Baseline (%)'})
medians = pd.DataFrame(full_results_df[['Method', y_axis]].groupby('Method')[y_axis].median().reset_index())
medians['Num_Models'] = medians['Method'].apply(lambda x: x.split("(")[1].strip())
medians['Method'] = medians['Method'].apply(lambda x: x.split("(")[0].strip())
medians['Method'] = medians['Method'].replace({'Distillation': 'Knowledge Distillation', 'Pretraining': 'Pretraining (ImageNet21k or ImageNet12k)', 'CLIP': 'Contrastive Language-Image Pretraining (CLIP)', 'SSL': 'Semi-Supervised Learning', 'Adversarial': 'Adversarial Training'})
full_results_df['Method'] = full_results_df['Method'].apply(lambda x: x.split("(")[0].strip())
full_results_df['Method'] = full_results_df['Method'].replace({'Distillation': 'Knowledge Distillation', 'Pretraining': 'Pretraining (ImageNet21k or ImageNet12k)', 'CLIP': 'Contrastive Language-Image Pretraining (CLIP)', 'SSL': 'Semi-Supervised Learning', 'Adversarial': 'Adversarial Training'})
y_axis_range = full_results_df[y_axis].to_numpy()
range_y = [min(y_axis_range) - 0.1, max(y_axis_range) + 0.1]
fig = px.box(full_results_df.dropna(subset=['Method']), x="Method", y=y_axis, hover_name="Architecture",
                color='Method', points="all",
                category_orders={"Method": ['a','b']})
# make font larger
fig.update_xaxes(title='', tickmode='array',
                tickvals=[0,1,2,3,4,5,6],
                ticktext=['a','b','Knowledge</br></br>Distillation', 'Pretraining</br></br>(ImageNet21k or</br>ImageNet12k)', 'Contrastive </br></br> Language-Image</br>Pretraining(CLIP)', 'Semi-Supervised</br></br>Learning', 'Adversarial</br></br>Training'])
fig.update_yaxes(title_text=y_axis, title_font=dict(size=34))

font_size = 33
fig.update_layout(
    font=dict(
        size=font_size,
    ))
# don't show legend 
fig.update_layout(showlegend=False)

# Adding borders (frame) to the figure
fig.update_layout(shapes=[
    # unfilled rectange
    go.layout.Shape(type="rect", xref="paper", yref="paper",
                    x0=0, y0=0, x1=1.0, y1=1.0, line={'width': 1, 'color': 'black'}), ])
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')  # Removing graph background color
# fig.update_xaxes(showgrid=True, gridwidth=0.5, gridcolor='LightGrey')  # Modifying grids color and opacity
fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='LightGrey')  # Modifying grids color and opacity
# snap the margins around the border
fig.update_layout(margin=dict(l=0, r=5, t=60, b=10))
fig.add_hline(y=0)
fig.write_html('results/figures/methods_comparison.html')
fig.write_image('results/figures/methods_comparison.pdf', width=1920, height=800)