In [1]:
import torch
import numpy as np
import re
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Setup experiment data

In [2]:
# Dataset, Model, Loss, Hierarchy, Results Filenames
exp_info = [
    # Imagenet 100
    ['Imagenet 100', 'Softmax', 'CE', None, ['experiments/imagenet100/softmax/R0/exp.result',
                                             'experiments/imagenet100/softmax/R1/exp.result',
                                             'experiments/imagenet100/softmax/R2/exp.result', ]],
    ['Imagenet 100', 'Cascade', 'Soft1-1', 'NBDT', ['experiments/imagenet100/cascade/nbdt-binary/softpred_R0/exp.result',
                                                    'experiments/imagenet100/cascade/nbdt-binary/softpred_R1/exp.result',
                                                    'experiments/imagenet100/cascade/nbdt-binary/softpred_R2/exp.result',
                                                    ]],
    ['Imagenet 100', 'Cascade', 'Soft1-1', '2 Lvl WN', ['experiments/imagenet100/cascade/two-lvl-wn/softpred_R0/exp.result',
                                                        'experiments/imagenet100/cascade/two-lvl-wn/softpred_R1/exp.result',
                                                        'experiments/imagenet100/cascade/two-lvl-wn/softpred_R2/exp.result', ]],
    ['Imagenet 100', 'Cascade', 'Soft1-1', 'Pruned WN', ['experiments/imagenet100/cascade/pruned-wn/softpred_R0/exp.result',
                                                         'experiments/imagenet100/cascade/pruned-wn/softpred_R1/exp.result',
                                                         'experiments/imagenet100/cascade/pruned-wn/softpred_R2/exp.result', ]],
    ['Imagenet 100', 'Cascade', 'Soft1-1,OE.2-.2', 'Pruned WN', ['experiments/imagenet100/cascade/pruned-wn/softpred_oe_R0/exp.result',
                                                                 'experiments/imagenet100/cascade/pruned-wn/softpred_oe_R1/exp.result',
                                                                 'experiments/imagenet100/cascade/pruned-wn/softpred_oe_R2/exp.result',
                                                                 ]],
    ['Imagenet 100', 'CascadeDW', 'Soft1-1', 'Pruned WN', ['experiments/imagenet100/cascade/pruned-wn/softpred_R0_depthweight/exp.result',
                                                          'experiments/imagenet100/cascade/pruned-wn/softpred_R1_depthweight/exp.result',
                                                          'experiments/imagenet100/cascade/pruned-wn/softpred_R3_depthweight/exp.result', ]],
    ['Imagenet 100', 'CascadeDW', 'Soft1-1,OE.2-.2', 'Pruned WN', ['experiments/imagenet100/cascade/pruned-wn/softpred_oe_R0_depthweight/exp.result',
                                                                  'experiments/imagenet100/cascade/pruned-wn/softpred_oe_R1_depthweight/exp.result',
                                                                  'experiments/imagenet100/cascade/pruned-wn/softpred_oe_R2_depthweight/exp.result',
                                                                 ]],
    ['Imagenet 100', 'CascadeDW', 'Soft1-1', '2 Lvl WN', ['experiments/imagenet100/cascade/two-lvl-wn/softpred_R0_depthweight/exp.result',
                                                         'experiments/imagenet100/cascade/two-lvl-wn/softpred_R1_depthweight/exp.result',
                                                         'experiments/imagenet100/cascade/two-lvl-wn/softpred_R2_depthweight/exp.result',]],
    ['Imagenet 100', 'CascadeDW', 'Soft1-1', 'NBDT', ['experiments/imagenet100/cascade/nbdt-binary/softpred_R0_depthweight/exp.result',
                                                     'experiments/imagenet100/cascade/nbdt-binary/softpred_R1_depthweight/exp.result',
                                                     'experiments/imagenet100/cascade/nbdt-binary/softpred_R2_depthweight/exp.result',]],
    ['Imagenet 100', 'MOS', 'MOS', '2 Lvl WN', ['experiments/imagenet100/mos/LRp1_R0/exp.result',
                                               'experiments/imagenet100/mos/LRp1_R1/exp.result',
                                               'experiments/imagenet100/mos/LRp1_R2/exp.result',
                                               ]],
    ['Imagenet 100', 'MOS', 'MOS', 'MOS Groups', ['experiments/imagenet100/mos/imagenet1000-mos_R0/exp.result',
                                                 'experiments/imagenet100/mos/imagenet1000-mos_R1/exp.result',
                                                 'experiments/imagenet100/mos/imagenet1000-mos_R2/exp.result',
                                                 ]],
    # Balanced 100
    ['Balanced 100', 'Softmax', 'CE', None, ['experiments/balanced100/softmax/R0/exp.result',
                                             'experiments/balanced100/softmax/R1/exp.result',
                                             'experiments/balanced100/softmax/R2/exp.result',
                                            ]],
    ['Balanced 100', 'MOS', 'MOS', 'MOS Groups', ['experiments/balanced100/mos/R0/exp.result',
                                                  'experiments/balanced100/mos/R1/exp.result',
                                                  'experiments/balanced100/mos/R2/exp.result',
                                                 ]],
    ['Balanced 100', 'Cascade', 'Soft1-1', 'Pruned WN', ['experiments/balanced100/cascade/pruned-wn/R0_dw/exp.result',
                                                         'experiments/balanced100/cascade/pruned-wn/R1_dw/exp.result',
                                                         'experiments/balanced100/cascade/pruned-wn/R2_dw/exp.result',
                                                        ]],
    ['Balanced 100', 'Cascade', 'Soft1-1,OE.2-.2', 'Pruned WN', [
        'experiments/balanced100/cascade/pruned-wn/R0_oe/exp.result',
        'experiments/balanced100/cascade/pruned-wn/R1_oe/exp.result',
        'experiments/balanced100/cascade/pruned-wn/R2_oe/exp.result',
    ]],
]
model_idx = 1
res_idx = 4
loss_info = []
for einfo in exp_info:
    if 'softmax' in einfo[1].lower():
        loss_info.append([r"$\mathcal{L}_{CE}$", None])
    elif 'mos' in einfo[1].lower():
        loss_info.append([r"$\mathcal{L}_{MOS}$", None])
    else:
        lsplit = einfo[2].split(',')
        linfo = r""
        lranges = {'alpha': None,
                   'beta': None,
                   'gamma': None}
        for ns, s in enumerate(lsplit):
            rng = re.findall('\d+', s)
            if 'soft' in s.lower():
                linfo += r"$\mathcal{L}_\text{soft}$"
                lranges['alpha'] = f'{rng[0]}--{rng[1]}'
            elif 'oe' in s.lower():
                linfo += r"$\mathcal{L}_\text{out}$"
                lranges['gamma'] = f'{rng[0]}--{rng[1]}'
            elif 'syn' in s.lower():
                linfo += r"$\mathcal{L}_\text{syn}$"
                lranges['beta'] = f'{rng[0]}--{rng[1]}'
            else:
                print(s)
                raise ValueError("Unknown loss")
            if ns < len(lsplit)-1:
                linfo += ", "
        loss_info.append([linfo, lranges])

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.precision', 4)

In [4]:
exp_stats = []
ood_datasets = ['OOD','iNaturalist', 'SUN', 'Places', 'Textures', 'coarseid-coarseood', 'coarseid-fineood',
                'balanced100-coarseood', 'balanced100-mediumood', 'balanced100-fineood', 'balanced100-finemediumood',
               ]
ood_metrics = ['AUROC', 'TNR', 'AUOUT']
ood_labels = ['AUROC', 'FPR', 'AUPR']
for exp in exp_info:
    pred_acc = np.empty((len(exp[res_idx]))) * np.nan
    msp = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    odin = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    mos = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    pred_msp = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    pred_odin = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    entmean_msp = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    entmean_odin = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    entmin_msp = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    entmin_odin = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    entmax_msp = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    entmax_odin = np.empty((len(exp[res_idx]), len(ood_datasets), 3)) * np.nan
    res_list = [msp, odin,
                mos,
                pred_msp, pred_odin,
                entmean_msp, entmean_odin,
                entmin_msp, entmin_odin,
                entmax_msp, entmax_odin,
               ]
    res_desc = ['MSP', 'ODIN',
                'MOS',
                'Pred', 'Pred ODIN',
                'MeanEnt', 'Mean Entropy Temp Scaled',
                'MinEnt', 'Min Entropy Temp Scaled',
                'MaxEnt', 'Max Entropy Temp Scaled',
               ]
    for i, it in enumerate(exp[res_idx]):
        it_res = torch.load(it)
        # Check if metrics include other ood sets
        try:
            if isinstance(it_res['ood'], tuple):
#                 print('tuple')
                it_res['ood'] = it_res['ood'][0]
                if 'MOS' in it_res['ood'].keys():
#                     print('MOS')
                    if 'OOD' not in it_res['ood']['MOS'].keys(): 
                        it_res['ood']['MOS'] = {'OOD': it_res['ood']['MOS']}
                elif 'OOD' not in it_res['ood']['MSP'].keys():
                    it_res['ood']['MSP'] = {'OOD': it_res['ood']['MSP']}
                    it_res['ood']['ODIN'] = {'OOD': it_res['ood']['ODIN']}
            elif 'MOS' in it_res['ood'].keys():
                if 'OOD' not in it_res['ood']['MOS'].keys(): 
                    it_res['ood']['MOS'] = {'OOD': it_res['ood']['MOS']}
            elif 'OOD' not in it_res['ood']['MSP'].keys():
                it_res['ood']['MSP'] = {'OOD': it_res['ood']['MSP']}
                it_res['ood']['ODIN'] = {'OOD': it_res['ood']['ODIN']}
        except:
            import pdb; pdb.set_trace()
        if it_res['pred'] is None:
            pred_acc[i] = float(it_res['top1']) 
        elif isinstance(it_res['pred'],np.ndarray):
            if it_res['pred'].size > 1:
                pred_acc[i] = float(it_res['pred'][0])
            else:
                pred_acc[i] = float(it_res['pred'])
        elif len(it_res['pred'].size()) > 0:
            pred_acc[i] = float(it_res['pred'][0])
        else:
            pred_acc[i] = float(it_res['pred'])
        for didx, dname in enumerate(ood_datasets):
            if 'MOS' in it_res['ood'].keys():
                if dname not in it_res['ood']['MOS'].keys():
                    continue
            else:
                if dname not in it_res['ood']['MSP'].keys():
                    continue
            for ood_res, ood_method in zip(res_list, res_desc):
                for mi, met in enumerate(ood_metrics):
                    if 'softmax' in exp[model_idx].lower():
                        if ood_method.upper() in ['MSP', 'ODIN']:
                            if met == 'TNR':
                                ood_res[i][didx][mi] = 1.- float(it_res['ood'][ood_method][dname][met][0])
                            else:
                                ood_res[i][didx][mi] = float(it_res['ood'][ood_method][dname][met][0])
                    elif 'mos' in exp[model_idx].lower():
                        if ood_method.upper() in ['MOS']:
                            if met == 'TNR':
                                ood_res[i][didx][mi] = 1.- float(it_res['ood'][ood_method][dname][met][0])
                            else:
                                ood_res[i][didx][mi] = float(it_res['ood'][ood_method][dname][met][0])
                    else:
                        if ood_method.upper() not in ['MSP', 'ODIN', 'MOS']:
                            meth = 'ODIN' if (('Temp' in ood_method) or ('ODIN' in ood_method)) else 'MSP'
                            if 'max' in ood_method.lower():
                                meth_idx = -4
                            elif 'pred' in ood_method.lower():
                                meth_idx = -3
                            elif 'mean' in ood_method.lower():
                                meth_idx = -2
                            else:
                                meth_idx = -1
                            if met == 'TNR':
                                ood_res[i][didx][mi] = 1.- float(it_res['ood'][meth][dname][met][meth_idx])
                            else:
                                ood_res[i][didx][mi] = float(it_res['ood'][meth][dname][met][meth_idx])
    exp_stats.append({desc: r for desc, r in zip(res_desc, res_list)})
    exp_stats[-1]['acc'] = pred_acc

In [5]:
def get_resultsdf_forOODDSET(dset='OOD'):
    dset_options = ['OOD','iNaturalist', 'SUN', 'Places', 'Textures',
                    'coarseid-coarseood', 'coarseid-fineood',
                    'imagenet1000-coarseood', 'imagenet1000-mediumood', 'imagenet1000-fineood',
                    'balanced100-coarseood', 'balanced100-mediumood', 'balanced100-fineood', 'balanced100-finemediumood',
                    # NOTE ORDER MATTERS HERE!
               ]
    if dset not in dset_options:
        raise ValueError("Unknown OOD DSET")
    else:
        dset_idx = dset_options.index(dset)
    miindex =  pd.MultiIndex.from_product(
        [[],[],[],[]], names=['Dataset', 'Model', 'Loss', 'Hierarchy'])
    acccolumns = pd.MultiIndex.from_product([['Accuracy'],[''], ['Mean', 'Std']], names=['acc/ood','metric','value'])
    oodcolumns = pd.MultiIndex.from_product(
        [['MSP', 'ODIN','MOS', 'Pred', 'MinEnt', 'MeanEnt'], ['AUROC', 'FPR', 'AUPR'], ['Mean', 'Std']], names=['acc/ood','metric','value'])
    df = pd.DataFrame(index=miindex, columns=oodcolumns)
    dfacc = pd.DataFrame(index=miindex, columns=acccolumns)
    df = dfacc.join(df)
    for info, stats in zip(exp_info, exp_stats):
        if ('imagenet' in dset): continue
        if ('coarseid' in dset) and ('imagenet 100' not in info[0].lower()): continue
        if ('balanced' in dset) and ('balanced' not in info[0].lower()): continue
        stats_dict = {}
        for ood_desc in res_desc:
            # Ignore temp scaled
            if ood_desc in ['Pred ODIN', 'Mean Entropy Temp Scaled', 'Min Entropy Temp Scaled', 'Max Entropy Temp Scaled']:
                continue
            for met_idx, met_desc in enumerate(ood_labels):
                if stats[ood_desc].shape[1] <= dset_idx: continue
                stats_dict[(ood_desc, met_desc, 'Mean')] = 100. * stats[ood_desc][:, dset_idx, met_idx].mean()
                stats_dict[(ood_desc, met_desc, 'Std')] = 100. * stats[ood_desc][:, dset_idx, met_idx].std()
        stats_dict[('Accuracy', '', 'Mean')] = 100. * stats['acc'].mean()
        stats_dict[('Accuracy', '', 'Std')] = 100. * stats['acc'].std()
        hstr = info[3] if info[3] is not None else 'N/A'
        for k, v in stats_dict.items():
            df.loc[(info[0], info[1], info[2], hstr), k] = v
    return df

In [6]:
df_ood = get_resultsdf_forOODDSET('OOD')
display(df_ood)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc/ood,Accuracy,Accuracy,MSP,MSP,MSP,MSP,MSP,MSP,ODIN,ODIN,ODIN,ODIN,ODIN,ODIN,MOS,MOS,MOS,MOS,MOS,MOS,Pred,Pred,Pred,Pred,Pred,Pred,MinEnt,MinEnt,MinEnt,MinEnt,MinEnt,MinEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,metric,Unnamed: 4_level_1,Unnamed: 5_level_1,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,value,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std
Dataset,Model,Loss,Hierarchy,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3
Imagenet 100,Softmax,CE,,81.261,0.5342,90.2465,0.623,44.7451,1.2756,67.1531,1.8115,93.1977,0.3628,33.098,3.9946,71.7468,2.2204,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,NBDT,78.996,2.039,,,,,,,,,,,,,,,,,,,89.6539,0.9706,50.3529,4.0901,63.9028,3.8669,91.7078,0.4675,35.8824,5.7547,71.2148,3.2933,91.6255,0.8477,40.3922,6.6262,69.2092,3.3909,85.1551,1.5356,74.3529,3.2617,46.2986,4.1603
Imagenet 100,Cascade,Soft1-1,2 Lvl WN,78.3213,0.4249,,,,,,,,,,,,,,,,,,,89.1135,0.531,52.1961,0.7337,61.9433,2.134,90.579,0.3003,49.2157,2.6902,64.2317,1.7605,91.1798,0.5143,44.1961,1.1379,66.179,2.4193,87.7659,1.8744,80.549,22.2819,48.0588,11.8426
Imagenet 100,Cascade,Soft1-1,Pruned WN,78.3614,0.8684,,,,,,,,,,,,,,,,,,,89.0887,0.6541,53.7647,2.4358,60.7355,1.761,92.6817,0.2411,27.8431,2.4572,76.3041,0.8298,92.3859,0.4303,37.9608,1.0361,73.3062,1.7227,86.5881,0.6249,67.2157,1.9244,50.6299,1.7946
Imagenet 100,Cascade,"Soft1-1,OE.2-.2",Pruned WN,83.0522,0.1202,,,,,,,,,,,,,,,,,,,92.1572,0.2577,34.7451,0.8716,72.5341,0.698,94.2932,0.2721,16.6667,0.967,82.7055,0.8074,93.9251,0.2712,24.1569,0.6396,80.8566,0.8378,91.4921,0.1297,38.8627,1.2971,68.7579,0.562
Imagenet 100,CascadeDW,Soft1-1,Pruned WN,82.3775,0.0568,,,,,,,,,,,,,,,,,,,91.3269,0.2832,40.8627,0.9717,69.9591,1.2132,93.6958,0.1335,19.0196,0.3882,83.0992,0.4033,93.9245,0.1961,25.6863,0.6747,80.8828,0.7992,89.1633,0.4585,57.9216,2.8501,59.417,1.5093
Imagenet 100,CascadeDW,"Soft1-1,OE.2-.2",Pruned WN,82.8514,0.1423,,,,,,,,,,,,,,,,,,,92.2935,0.1514,34.0392,1.4767,71.6454,0.9573,94.3834,0.075,16.4314,0.4929,82.6846,0.2525,94.0823,0.1278,23.6078,0.9622,80.235,0.9147,91.7915,0.2693,38.1176,1.4345,69.0402,1.9963
Imagenet 100,CascadeDW,Soft1-1,2 Lvl WN,82.1928,0.384,,,,,,,,,,,,,,,,,,,91.7306,0.1693,38.0784,1.6922,71.4923,1.1987,93.0846,0.134,31.0588,1.8251,75.3833,0.0828,93.4306,0.0811,28.3922,1.0181,76.44,0.3687,92.12,0.0441,36.0,0.8538,71.7191,0.5352
Imagenet 100,CascadeDW,Soft1-1,NBDT,81.2771,0.4775,,,,,,,,,,,,,,,,,,,91.3295,0.2939,38.4706,1.5459,70.602,0.8605,93.149,0.2392,27.2941,3.6917,78.0544,0.8601,92.9154,0.1447,32.1961,1.4387,75.3558,0.5716,86.686,0.1758,72.9412,1.1325,48.3244,0.4401
Imagenet 100,MOS,MOS,2 Lvl WN,81.5181,0.1041,,,,,,,,,,,,,91.4097,0.5228,45.9216,4.9443,62.8093,1.9571,,,,,,,,,,,,,,,,,,,,,,,,


In [7]:
df_iNaturalist = get_resultsdf_forOODDSET('iNaturalist')
df_SUN = get_resultsdf_forOODDSET('SUN')
df_Places = get_resultsdf_forOODDSET('Places')
df_Textures = get_resultsdf_forOODDSET('Textures')
df_coarseid_coarseood = get_resultsdf_forOODDSET('coarseid-coarseood')
df_coarseid_fineood = get_resultsdf_forOODDSET('coarseid-fineood')
df_balanced100_coarseood = get_resultsdf_forOODDSET('balanced100-coarseood')
df_balanced100_mediumood = get_resultsdf_forOODDSET('balanced100-mediumood')
df_balanced100_fineood = get_resultsdf_forOODDSET('balanced100-fineood')
df_balanced100_finemediumood = get_resultsdf_forOODDSET('balanced100-finemediumood')

In [8]:
print("coarseood")
display(df_coarseid_coarseood)
print("fineood")
display(df_coarseid_fineood)

coarseood


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc/ood,Accuracy,Accuracy,MSP,MSP,MSP,MSP,MSP,MSP,ODIN,ODIN,ODIN,ODIN,ODIN,ODIN,MOS,MOS,MOS,MOS,MOS,MOS,Pred,Pred,Pred,Pred,Pred,Pred,MinEnt,MinEnt,MinEnt,MinEnt,MinEnt,MinEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,metric,Unnamed: 4_level_1,Unnamed: 5_level_1,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,value,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std
Dataset,Model,Loss,Hierarchy,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3
Imagenet 100,Softmax,CE,,81.261,0.5342,92.6162,0.6653,38.9778,1.3525,69.3019,2.0693,95.8997,0.4733,26.1333,4.2847,74.2832,2.7425,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,NBDT,78.996,2.039,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,2 Lvl WN,78.3213,0.4249,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,Pruned WN,78.3614,0.8684,,,,,,,,,,,,,,,,,,,91.0163,0.6701,49.2,2.7086,61.7189,1.8799,96.7408,0.2507,18.7111,2.7049,81.0084,1.0008,94.959,0.4597,30.5333,1.2269,76.5228,1.8135,88.2367,0.7019,64.4444,2.4205,50.4227,1.924
Imagenet 100,Cascade,"Soft1-1,OE.2-.2",Pruned WN,83.0522,0.1202,,,,,,,,,,,,,,,,,,,94.9641,0.0386,27.2444,0.9758,75.7929,0.611,98.1797,0.1254,6.4444,1.0461,87.9223,0.841,97.1142,0.0687,14.7556,0.6379,85.4674,0.7764,94.2366,0.1262,31.3333,1.2555,71.675,0.7836
Imagenet 100,CascadeDW,Soft1-1,Pruned WN,82.3775,0.0568,,,,,,,,,,,,,,,,,,,93.9303,0.2191,33.9651,1.1844,72.2725,1.0095,98.0418,0.0678,9.0667,0.476,88.1835,0.9606,96.8992,0.1056,16.6444,0.6733,84.8616,0.8925,91.502,0.4041,52.8889,3.3268,60.109,1.4205
Imagenet 100,CascadeDW,"Soft1-1,OE.2-.2",Pruned WN,82.8514,0.1423,,,,,,,,,,,,,,,,,,,95.0574,0.1312,26.6667,1.4584,73.4256,1.0519,98.1497,0.071,6.4762,0.3367,86.7719,0.3777,97.2292,0.1103,14.7619,1.0191,83.7381,0.955,94.5733,0.2897,30.5238,1.3669,70.6778,2.1546
Imagenet 100,CascadeDW,Soft1-1,2 Lvl WN,82.1928,0.384,,,,,,,,,,,,,,,,,,,94.2884,0.1536,30.9048,1.7077,73.5599,1.1957,96.3392,0.0838,23.619,1.5836,77.9489,0.4276,96.1431,0.0453,19.9524,1.0191,79.1919,0.422,94.7534,0.0502,28.381,0.6734,73.7935,0.5103
Imagenet 100,CascadeDW,Soft1-1,NBDT,81.2771,0.4775,,,,,,,,,,,,,,,,,,,94.0339,0.2173,31.3333,1.2363,72.6267,0.6893,96.8134,0.1743,18.7619,3.3853,81.6939,0.7339,95.7669,0.1735,23.9048,1.3469,78.1533,0.5599,88.7871,0.1044,70.8571,0.9258,46.4464,0.0792
Imagenet 100,MOS,MOS,2 Lvl WN,81.5181,0.1041,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


fineood


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc/ood,Accuracy,Accuracy,MSP,MSP,MSP,MSP,MSP,MSP,ODIN,ODIN,ODIN,ODIN,ODIN,ODIN,MOS,MOS,MOS,MOS,MOS,MOS,Pred,Pred,Pred,Pred,Pred,Pred,MinEnt,MinEnt,MinEnt,MinEnt,MinEnt,MinEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MeanEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt,MaxEnt
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,metric,Unnamed: 4_level_1,Unnamed: 5_level_1,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR,AUROC,AUROC,FPR,FPR,AUPR,AUPR
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,value,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std,Mean,Std
Dataset,Model,Loss,Hierarchy,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3
Imagenet 100,Softmax,CE,,81.261,0.5342,72.4741,0.3067,88.0,0.8165,4.9591,0.1653,72.9326,1.874,85.3333,2.0548,5.3707,0.5462,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,NBDT,78.996,2.039,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,2 Lvl WN,78.3213,0.4249,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Imagenet 100,Cascade,Soft1-1,Pruned WN,78.3614,0.8684,,,,,,,,,,,,,,,,,,,74.6313,0.9243,88.0,4.3205,5.3551,0.2146,62.2385,0.723,96.3333,1.6997,3.1188,0.0428,73.0879,0.9509,93.6667,1.6997,4.4602,0.3027,74.2239,1.1431,88.0,3.559,4.9173,0.5057
Imagenet 100,Cascade,"Soft1-1,OE.2-.2",Pruned WN,83.0522,0.1202,,,,,,,,,,,,,,,,,,,71.1058,1.9848,91.0,1.633,4.2391,0.2876,65.1449,1.4024,93.3333,0.4714,3.5046,0.1096,70.0075,1.9341,94.6667,0.9428,3.9339,0.2504,70.908,1.8661,95.3333,1.6997,4.0258,0.213
Imagenet 100,CascadeDW,Soft1-1,Pruned WN,82.3775,0.0568,,,,,,,,,,,,,,,,,,,76.7798,3.3787,79.4444,9.9641,9.6494,3.9606,69.1018,6.9613,75.0,14.1448,14.3994,8.1302,77.2715,3.8316,76.2222,11.1864,14.2271,7.2056,76.1141,3.0156,85.6667,6.6053,8.0873,2.879
Imagenet 100,CascadeDW,"Soft1-1,OE.2-.2",Pruned WN,82.8514,0.1423,,,,,,,,,,,,,,,,,,,79.3951,0.7621,68.4444,1.5713,14.0637,0.7203,76.8074,0.7331,62.8889,1.3699,20.7116,0.719,79.3971,0.6714,64.8889,0.8315,19.0112,1.1164,78.8101,0.6763,73.5556,1.7498,12.4753,1.3334
Imagenet 100,CascadeDW,Soft1-1,2 Lvl WN,82.1928,0.384,,,,,,,,,,,,,,,,,,,79.7942,0.2455,71.5556,3.9378,13.6303,1.9334,77.8962,0.8253,65.7778,3.0952,14.9327,1.5085,80.7722,0.2531,67.7778,1.1331,15.4191,1.5,79.8307,0.0441,71.5556,1.9116,13.5843,1.0601
Imagenet 100,CascadeDW,Soft1-1,NBDT,81.2771,0.4775,,,,,,,,,,,,,,,,,,,78.7085,0.9449,71.7778,2.9979,12.9654,1.0761,76.0486,0.6013,67.1111,5.3426,15.0504,2.2381,79.6085,0.4631,70.8889,2.6851,14.4372,1.4291,76.8804,1.0636,82.6667,3.311,8.9555,0.8179
Imagenet 100,MOS,MOS,2 Lvl WN,81.5181,0.1041,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
miindex =  pd.MultiIndex.from_product(
    [[],[],[],[],[]], names=['Dataset', 'OOD DSet', 'Model', 'Loss', 'Hierarchy'])
acccolumns = pd.MultiIndex.from_product([['Accuracy'],[' '], ['Mean', 'Std']], names=['acc/ood','metric','value'])
oodmethcolumn = pd.MultiIndex.from_product([['Best AUROC'],['Method'], [' ']], names=['acc/ood','metric','value'])
oodcolumns = pd.MultiIndex.from_product(
    [['Best AUROC'], ['AUROC', 'FPR', 'AUPR'], ['Mean', 'Std']], names=['acc/ood','metric','value'])
df = pd.DataFrame(index=miindex, columns=acccolumns)
dfood = pd.DataFrame(index=miindex, columns=oodcolumns)
dfoodmeth = pd.DataFrame(index=miindex, columns=oodmethcolumn)
df = df.join(dfoodmeth)
df = df.join(dfood)
ood_dsets = ['OOD', 'iNaturalist', 'SUN', 'Places', 'Textures']
for info, stats in zip(exp_info, exp_stats):
    for ods_idx, ood_dset in enumerate(ood_dsets):
        stats_dict = {}
        best_auroc_method = ''
        best_auroc = -1.
        for ood_desc in res_desc:
                
            if stats[ood_desc][:, ods_idx, 0].mean() > best_auroc:
#                 if 'softmax' in info[1].lower():
#                     if ood_desc != 'MSP': continue
                best_auroc = stats[ood_desc][:, ods_idx, 0].mean()
                stats_dict[('Best AUROC', 'Method', ' ')] = ood_desc
                for met_idx, met_desc in enumerate(ood_labels):
                    stats_dict[('Best AUROC', met_desc, 'Mean')] = stats[ood_desc][:, ods_idx, met_idx].mean()
                    stats_dict[('Best AUROC', met_desc, 'Std')] = stats[ood_desc][:, ods_idx, met_idx].std()
        hstr = info[3] if info[3] is not None else 'N/A'
        for k, v in stats_dict.items():
            df.loc[(info[0], ood_dset, info[1], info[2], hstr), k] = v
    df.loc[(info[0], 'OOD', info[1], info[2], hstr), ('Accuracy', ' ', 'Mean')] = 100. * stats['acc'].mean()
    df.loc[(info[0], 'OOD', info[1], info[2], hstr), ('Accuracy', ' ', 'Std')] = 100. * stats['acc'].std()
df = df.sort_index()
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,acc/ood,Accuracy,Accuracy,Best AUROC,Best AUROC,Best AUROC,Best AUROC,Best AUROC,Best AUROC,Best AUROC
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,metric,Unnamed: 5_level_1,Unnamed: 6_level_1,Method,AUROC,AUROC,FPR,FPR,AUPR,AUPR
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,value,Mean,Std,Unnamed: 7_level_2,Mean,Std,Mean,Std,Mean,Std
Dataset,OOD DSet,Model,Loss,Hierarchy,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
Balanced 100,OOD,Cascade,Soft1-1,Pruned WN,78.3143,3.9667,MeanEnt,0.8202,0.0138,0.5244,0.0106,0.7263,0.0182
Balanced 100,OOD,Cascade,"Soft1-1,OE.2-.2",Pruned WN,81.8286,0.1017,MeanEnt,0.8484,0.0042,0.4856,0.0028,0.7489,0.0091
Balanced 100,OOD,MOS,MOS,MOS Groups,80.3524,0.2091,MOS,0.868,0.0035,0.5887,0.0076,0.6997,0.0031
Balanced 100,OOD,Softmax,CE,,80.8476,0.2266,ODIN,0.8682,0.0023,0.4996,0.0292,0.7414,0.0122
Balanced 100,Places,Cascade,Soft1-1,Pruned WN,,,MeanEnt,0.9337,0.0064,0.3438,0.0238,0.971,0.0032
Balanced 100,Places,Cascade,"Soft1-1,OE.2-.2",Pruned WN,,,MeanEnt,0.9468,0.002,0.2562,0.0092,0.9756,0.0016
Balanced 100,Places,MOS,MOS,MOS Groups,,,MOS,0.9465,0.0011,0.3615,0.0079,0.9681,0.0022
Balanced 100,Places,Softmax,CE,,,,ODIN,0.9635,0.0009,0.1913,0.004,0.9841,0.0001
Balanced 100,SUN,Cascade,Soft1-1,Pruned WN,,,MeanEnt,0.9491,0.0056,0.2886,0.0262,0.9766,0.003
Balanced 100,SUN,Cascade,"Soft1-1,OE.2-.2",Pruned WN,,,MeanEnt,0.959,0.0023,0.205,0.0107,0.9802,0.0016
