In [None]:
%pylab inline
from sklearn.utils.random import check_random_state
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib
import graphviz
import itertools
import os
from scipy import integrate

import matplotlib.pylab as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import scipy
from scipy import stats

from sklearn.metrics import roc_auc_score, precision_recall_curve, det_curve, roc_curve
import pickle
import pandas as pd
import seaborn as sns

print("TF version:", tf.__version__)
print("Hub version:", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")
pd.set_option('display.max_rows', 100)

# it saves the plots if PLOT else it shows the plots on the notebook
PLOT = False
if PLOT:
    matplotlib.use("pgf")
    matplotlib.rcParams.update({
        "pgf.texsystem": "pdflatex",
        'font.family': 'serif',
        'text.usetex': True,
        'pgf.rcfonts': False,
    })

In [None]:
rename = {
    'energy': 'Energy',
    'odin_t=1000_epsilon=0.0014': 'ODIN(T=1000, $\epsilon$=0.0014)',
    'softmax_t=1': 'Softmax',
    'softmax_t=1000': 'ODIN(T=1000, $\epsilon$=0)',
}

In [None]:
def compare_scores(scores_id, scores_ood):
    scores_id = np.nan_to_num(scores_id).clip(-1e18, 1e18).astype('float64')
    scores_ood = np.nan_to_num(scores_ood).clip(-1e18, 1e18).astype('float64')
    y_score = np.concatenate([scores_id, scores_ood])
    y_true = np.array([1] * len(scores_id) + [0] * len(scores_ood))
    data = {}
    ks = stats.ks_2samp(scores_id, scores_ood)
    data['pvalue'] = ks.pvalue
    data['statistic'] = ks.statistic
    data['auc'] = roc_auc_score(y_true, y_score)
    data['mean_diff'] = scores_id.mean()-scores_ood.mean()
    
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    l = [(f, th, t) for f, t, th in zip(fpr, tpr, thresholds) if t >= 0.95]
    fpr95 = min(l)
    
    fpr, threshold, tpr = fpr95
    data['fpr95'] = fpr
    data['fpr95_threshold'] = threshold
    data['detection_error'] = 0.5 * (1-tpr) + 0.5 * fpr
    return data

In [None]:
# output[model_name][dataset_name][scores_name] => np.array (n, ) with the scores
output = pickle.load(open('../input/merger-feature-vector/feature_vector.pkl', 'rb'))

In [None]:
# for fixing ODIN(ε=0.0014)
'''
for model_name in output:
    dataset_id = 'imagenet|'
    for dataset_name in output[model_name]:
        for scores_name in output[model_name][dataset_name]:
            if '0.0014' in scores_name and 'base' in scores_name:
                output[model_name][dataset_name][scores_name] = -output[model_name][dataset_name][scores_name]
'''

In [None]:
# show all the comparison ID-OOD
for model_name in output:
    datasets = output[model_name]
    dataset_id = 'imagenet|'
    for dataset_name in datasets:
        if dataset_name != dataset_id and 'imagenet|iNaturalist' not in dataset_name:
            for scores_name in datasets[dataset_name]:
                if scores_name not in  ['react|fv', 'base|fv', 'images']:
                    try:
                        data = compare_scores(datasets[dataset_id][scores_name], datasets[dataset_name][scores_name])
                        print(f'{model_name} - {dataset_name} - {scores_name}: {data}')
                    except ValueError:
                        print(f'Error: {dataset_id} - {dataset_name} - {scores_name}')

In [None]:
# create the df_softmax dataframe
df_softmax = {'dataset': [], 'react_auc': [], 'base_auc': [], 'react_pvalue': [], 'base_pvalue': [], 'scoring': [],
              'base_x0': [], 'react_x0':  [], 'base_fpr95': [], 'react_fpr95':  [], 'base_fpr95_threshold': [], 'react_fpr95_threshold':  [],
              'base_detection_error': [], 'react_detection_error':  [],
}

datasets = output['ResNet101V2']
dataset_id = 'imagenet|'
for dataset_name in datasets:
    l_softmax = ['base|softmax_t=1', 'react|softmax_t=1', 
                 'base|softmax_t=1000', 'react|softmax_t=1000', 
                 'base|odin_t=1000_epsilon=0.0014', 'react|odin_t=1000_epsilon=0.0014',
                 'base|energy', 'react|energy',
    ]
    
    for scores_name in l_softmax:
        data = compare_scores(datasets[dataset_id][scores_name], datasets[dataset_name][scores_name])
        
        prefix_scorer = scores_name.split('|')[0]
        if prefix_scorer == 'base':
            df_softmax['dataset'].append(dataset_name)
            df_softmax['scoring'].append(scores_name.split('|')[-1])

        df_softmax[f'{prefix_scorer}_x0'].append(datasets[dataset_name][scores_name][0])
        df_softmax[f'{prefix_scorer}_auc'].append(data['auc'])
        df_softmax[f'{prefix_scorer}_pvalue'].append(data['pvalue'])
        df_softmax[f'{prefix_scorer}_fpr95'].append(data['fpr95'])
        df_softmax[f'{prefix_scorer}_fpr95_threshold'].append(data['fpr95_threshold'])
        df_softmax[f'{prefix_scorer}_detection_error'].append(data['detection_error'])
            
df_softmax = pd.DataFrame(df_softmax)
df_softmax['diff_auc'] = df_softmax['react_auc'] - df_softmax['base_auc'] 
df_softmax['diff_pvalue'] = df_softmax['react_pvalue'] - df_softmax['base_pvalue']
df_softmax['diff_fpr95'] = df_softmax['react_fpr95'] - df_softmax['base_fpr95']

df_softmax

In [None]:
print(f'mean auc diff: {df_softmax.diff_auc.mean()}')
print(f'mean auc base: {df_softmax.base_auc.mean()}')
print(f'mean auc react: {df_softmax.react_auc.mean()}')

print(f'mean fpr95 diff: {df_softmax.diff_fpr95.mean()}')
print(f'mean fpr95 base: {df_softmax.base_fpr95.mean()}')
print(f'mean fpr95 react: {df_softmax.react_fpr95.mean()}')

fig, ax = plt.subplots(figsize=(10,10))
corr = df_softmax.corr()
sns.heatmap(corr, cmap="Blues", annot=True, ax=ax)

In [None]:
df_softmax[df_softmax.scoring=='softmax_t=1'].sort_values(by=['react_auc']).head(60)

In [None]:
df_softmax[df_softmax.scoring=='softmax_t=1'].sort_values(by=['base_auc']).head(60)

In [None]:
df_softmax[df_softmax.scoring=='softmax_t=1'].sort_values(by=['diff_auc']).head(60)

# Imagenet - increase scale

In [None]:
# gaussian
fig, ax = plt.subplots(figsize=(24, 12))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    
    df = df[(df.dataset.str.contains("imagenet|", regex=False)&df.dataset.str.contains("gaussian_", regex=False))|(df.dataset=="imagenet|")]
    x = [float(a.split('_')[-1]) if len(a.split('_')) >= 2 else 0 for a in df.dataset.to_numpy()]
    xs, ys, ys2 = zip(*sorted(zip(x, df['base_auc'], df['react_auc'])))
    ax.plot(xs, ys, label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(xs, ys2, label='react '+ rename[scoring], linestyle='--', color=color)
ax.legend()
ax.grid()
ax.set_xscale('log')
plt.show()

if PLOT:
    fig.savefig('gaussian_incremental.pgf')

In [None]:
# pixelization
fig, ax = plt.subplots(figsize=(24, 12))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    df = df[df.dataset.str.contains("imagenet|", regex=False)&df.dataset.str.contains("pixelization_", regex=False)|(df.dataset=="imagenet|")]
    x = [float(a.split('_')[-1]) if len(a.split('_')) >= 2 else 1 for a in df.dataset.to_numpy()]
    xs, ys, ys2 = zip(*sorted(zip(x, df['base_auc'], df['react_auc'])))
    ax.plot(xs, ys, label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(xs, ys2, label='react '+ rename[scoring], linestyle='--', color=color)

ax.legend()
ax.grid()
ax.set_xscale('log')

if PLOT:
    fig.savefig('pixelization_incremental.pgf')

In [None]:
# blur
fig, ax = plt.subplots(figsize=(24, 12))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    df = df[df.dataset.str.contains("imagenet|", regex=False)&df.dataset.str.contains("blur_", regex=False)|(df.dataset=="imagenet|")]
    x = [float(a.split('_')[-1]) if len(a.split('_')) >= 2 else 0 for a in df.dataset.to_numpy()]
    xs, ys, ys2 = zip(*sorted(zip(x, df['base_auc'], df['react_auc'])))
    ax.plot(xs, ys, label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(xs, ys2, label='react '+ rename[scoring], linestyle='--', color=color)

ax.legend()
ax.grid()

if PLOT:
    fig.savefig('blur_incremental.pgf')

In [None]:
# encodingquality
fig, ax = plt.subplots(figsize=(16, 8))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    df = df[df.dataset.str.contains("imagenet|", regex=False)&df.dataset.str.contains("encodingquality_", regex=False)|(df.dataset=="imagenet|")]
    x = [float(a.split('_')[-1]) if len(a.split('_')) >= 2 else 100 for a in df.dataset.to_numpy()]
    
    xs, ys, ys2 = zip(*sorted(zip(x, df['base_auc'], df['react_auc'])))
    ax.plot(xs, ys, label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(xs, ys2, label='react '+ rename[scoring], linestyle='--', color=color)

ax.legend()
ax.set_xlabel('JPG Quality')
ax.grid()

if PLOT:
    fig.savefig('encodingquality_incremental.pgf')

In [None]:
# perspectivetransform
fig, ax = plt.subplots(figsize=(16, 8))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    df = df[df.dataset.str.contains("imagenet|", regex=False)&df.dataset.str.contains("perspectivetransform_", regex=False)|(df.dataset=="imagenet|")]
    x = [float(a.split('_')[-1]) if len(a.split('_')) >= 2 else 0 for a in df.dataset.to_numpy()]
    ax.plot(x, df['base_auc'], label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(x, df['react_auc'], label='react '+ rename[scoring], linestyle='--', color=color)
ax.legend()
ax.grid()

if PLOT:
    fig.savefig('perspectivetransform_incremental.pgf')

In [None]:
# adversarial_x_softmax_base (starting from OOD iNaturalist)
fig, ax = plt.subplots(figsize=(16, 8))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    df = df[df.dataset.str.contains("^iNaturalist\\|adversarial\\_[0-9.]*\\_softmax\\_base", regex=True)|(df.dataset=="imagenet|")]
    x = [float(a.split('_')[1]) if len(a.split('_')) >= 2 else 0 for a in df.dataset.to_numpy()]
    xs, ys, ys2 = zip(*sorted(zip(x, df['base_auc'], df['react_auc'])))
    ax.plot(xs, ys, label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(xs, ys2, label='react '+ rename[scoring], linestyle='--', color=color)

ax.set_xscale('log')
ax.legend()
ax.grid()

if PLOT:
    fig.savefig('adversarial_incremental.pgf')
else:
    plt.show()

# imagenet - sample image scoring

In [None]:
# gaussian
fig, ax = plt.subplots(figsize=(16, 8))
scorings = df_softmax.scoring.unique()
colors = [f'C{i}' for i in range(10)]
for scoring, color in zip(scorings, colors):
    df = df_softmax[df_softmax.scoring==scoring]
    df = df[(df.dataset.str.contains("imagenet|", regex=False)&df.dataset.str.contains("gaussian_", regex=False))|(df.dataset=="imagenet|")]
    df = df.copy()
    df['base_x0'] = (df['base_x0']-df['base_x0'].min())/(df['base_x0'].max()-df['base_x0'].min())
    df['react_x0'] = (df['react_x0']-df['react_x0'].min())/(df['react_x0'].max()-df['react_x0'].min())
    
    x = [float(a.split('_')[-1]) if len(a.split('_')) >= 2 else 0 for a in df.dataset.to_numpy()]

    xs, ys, ys2 = zip(*sorted(zip(x, df['base_auc'], df['react_auc'])))
    ax.plot(xs, ys, label='base '+ rename[scoring], linestyle='-', color=color)
    ax.plot(xs, ys2, label='react '+ rename[scoring], linestyle='--', color=color)

ax.legend()
ax.grid()

# avg reduction - hist

In [None]:
# avg reduction
fig, axs = plt.subplots(figsize=(16, 8), ncols=2)
df = df_softmax[~df_softmax.dataset.str.contains('imagenet|', regex=False)]

_ = axs[0].hist(df.diff_auc, bins=200)
_ = axs[1].hist(df.diff_pvalue, bins=200)

# diff auc - react vs base

In [None]:
# better
df = df_softmax[~df_softmax.dataset.str.contains('imagenet|', regex=False)]
df[df.diff_auc>=0]

In [None]:
# worse
df = df_softmax[~df_softmax.dataset.str.contains('imagenet|', regex=False)]
df[df.diff_auc<0]

In [None]:
df = df_softmax[df_softmax.dataset.str.contains('imagenet|', regex=False)]
df

# Best & Worst

In [None]:
df_softmax.sort_values('diff_auc', ascending=True).head(10)

In [None]:
df_softmax.sort_values('diff_auc', ascending=False).head(10)

# Adversarial - iNaturalist

In [None]:
df_softmax[(df_softmax.scoring=='softmax_t=1')&(df_softmax.dataset.str.contains('iNaturalist|', regex=False))].sort_values(by=['base_auc']).head(60)

In [None]:
df_softmax[(df_softmax.scoring=='softmax_t=1')&(df_softmax.dataset.str.contains('iNaturalist|', regex=False))].sort_values(by=['react_auc']).head(60)

In [None]:
df_softmax[(df_softmax.scoring=='softmax_t=1000')&(df_softmax.dataset.str.contains('iNaturalist|', regex=False))].sort_values(by=['base_auc']).head(60)

In [None]:
df_softmax[(df_softmax.scoring=='softmax_t=1000')&(df_softmax.dataset.str.contains('iNaturalist|', regex=False))].sort_values(by=['react_auc']).head(60)

# Failure cases of ReAct

In [None]:
df_softmax.groupby('scoring').count()

In [None]:
# worse
df_softmax[df_softmax.diff_auc<0].groupby('scoring').count()

In [None]:
# better
df_softmax[df_softmax.diff_auc>=0].groupby('scoring').count()

In [None]:
# worse
df_softmax[df_softmax.diff_auc<0].groupby('dataset').count()

In [None]:
# better
df_softmax[df_softmax.diff_auc>=0].groupby('dataset').count()

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
df = df_softmax[~df_softmax.dataset.str.contains('imagenet|', regex=False)]
ax.scatter(df.base_auc, df.diff_auc, label='not imagenet')

df = df_softmax[df_softmax.dataset.str.contains('imagenet|', regex=False)]
ax.scatter(df.base_auc, df.diff_auc, label='imagenet')

ax.grid()
ax.legend()
ax.set_xlabel('base auc')
ax.set_ylabel('diff auc')

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
for dataset in df_softmax.dataset.unique():
    df = df_softmax[df_softmax.dataset==dataset]
    ax.scatter(df.base_auc, df.diff_auc, label=dataset)    
ax.grid()
ax.legend()
ax.set_xlabel('base auc')
ax.set_ylabel('diff auc')

# Scatter - base/react

In [None]:
fig, ax = plt.subplots(figsize=(24, 24))

for scoring in df_softmax.scoring.unique():
    df = df_softmax[df_softmax.scoring==scoring]
    ax.scatter(df.base_auc, df.react_auc, label=rename[scoring])

ax.plot([0,1],[0,1], transform=ax.transAxes)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid()
ax.legend()
ax.set_xlabel('base auc')
ax.set_ylabel('react auc')
if PLOT:
    fig.savefig('base_react_scatter.pgf')

In [None]:
fig, ax = plt.subplots(figsize=(16, 16))

for scoring in df_softmax.scoring.unique():
    df = df_softmax[df_softmax.scoring==scoring]
    ax.scatter(df.base_fpr95, df.react_fpr95, label=rename[scoring])

ax.plot([0,1],[0,1], transform=ax.transAxes)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid()
ax.legend()
ax.set_xlabel('base fpr95')
ax.set_ylabel('react fpr95')

In [None]:
fig, ax = plt.subplots(figsize=(16, 16))

for dataset in df_softmax.dataset.unique():
    df = df_softmax[df_softmax.dataset==dataset]
    ax.scatter(df.base_auc, df.react_auc, label=dataset)

ax.plot([0,1],[0,1], transform=ax.transAxes)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid()
ax.legend()
ax.set_xlabel('base auc')
ax.set_ylabel('react auc')

# Score - Distribution

In [None]:
id_distribution = 'imagenet|'
l = [
#    ('imagenet_a', 'imagenet_a|'),
    ('iNaturalist', 'iNaturalist|'),
#    ('iNaturalist adversarial base', 'iNaturalist|adversarial_0.1_softmax_base'),
#    ('iNaturalist adversarial react', 'iNaturalist|adversarial_0.1_softmax_react'),
    ('gaussian', 'gauss|'),
]

fig, axss = plt.subplots(figsize=(32, 24), nrows=len(l), ncols=2)

for (name, dataset), axs in zip(l, axss):
    for ax, text in [(axs[0], 'base'), (axs[1], 'react')]:
        id_score = output['ResNet101V2'][id_distribution][f'{text}|softmax_t=1'].flatten()
        ood_score = output['ResNet101V2'][dataset][f'{text}|softmax_t=1'].flatten()
        ax.hist(id_score, label='id', alpha=0.5, bins=100)
        ax.hist(ood_score, label='ood', alpha=0.5, bins=100)
        ax.set_title(name)
        ax.legend()

if PLOT:
    fig.savefig('score_distribution_softmax.pgf')

In [None]:
id_distribution = 'imagenet|'
l = [
#    ('imagenet_a', 'imagenet_a|'),
    ('iNaturalist', 'iNaturalist|'),
#    ('iNaturalist adversarial base', 'iNaturalist|adversarial_0.1_softmax_base'),
#    ('iNaturalist adversarial react', 'iNaturalist|adversarial_0.1_softmax_react'),
    ('gaussian', 'gauss|'),
]

fig, axss = plt.subplots(figsize=(32, 24), nrows=len(l), ncols=2)

for (name, dataset), axs in zip(l, axss):
    for ax, text in [(axs[0], 'base'), (axs[1], 'react')]:
        id_score = output['ResNet101V2'][id_distribution][f'{text}|softmax_t=1000'].flatten()
        ood_score = output['ResNet101V2'][dataset][f'{text}|softmax_t=1000'].flatten()
        ax.hist(id_score, label='id', alpha=0.5, bins=100)
        ax.hist(ood_score, label='ood', alpha=0.5, bins=100)
        ax.set_title(name)
        ax.legend()

if PLOT:
    fig.savefig('score_distribution_odin.pgf')

## Plot Images

In [None]:
l_imgs = []
for model_name in output:
    datasets = output[model_name]
    for dataset_name in datasets:
        l_imgs.append((datasets[dataset_name]['images'][0], dataset_name))

nrows = (2+len(l_imgs))//3
fig, axs = plt.subplots(ncols=3, nrows=nrows, figsize=(48, 6 * nrows))

for ax, (img, name) in zip(axs.flatten(), l_imgs):
    ax.imshow(img.astype('float32'))
    ax.set_title(name)

if PLOT:
    fig.savefig('figure.pgf')

## Ranking

In [None]:
scorer = {
    # model: (base, react)
    'ResNet50V2': (0.546875, 0.41015625),
    'DenseNet121': (0.591796875, 0.5390625),
    'DenseNet169': (0.625, 0.595703125),
    'DenseNet201': (0.66015625, 0.607421875),
    'EfficientNetB0': (0.6250, 0.556640625),
    'VGG16': (0.53515625, 0.517578125),
    'VGG19': (0.53515625, 0.533203125),
    'ResNet101V2': (0.587890625, 0.484375),
    'ResNet152V2': (0.611328125, 0.49609375),
}

common_datasets = [
    'iNaturalist|',
    'SUN|',
    'Places|',
    'imagenet_v2|',
    'imagenet|gaussian_0.002',
    'imagenet|gaussian_0.01',
    'imagenet|gaussian_0.05',
    'imagenet|gaussian_0.25',
    'imagenet|gaussian_1.25',
]

In [None]:
for model_name in output:
    datasets = output[model_name]
    dataset_id = 'imagenet|'
    pos = {}
    for dataset_name in common_datasets:
        if dataset_name != dataset_id:
            
            softmax_base  = compare_scores(datasets[dataset_id]['base|softmax_t=1'], datasets[dataset_name]['base|softmax_t=1'])['auc']
            softmax_react = compare_scores(datasets[dataset_id]['react|softmax_t=1'], datasets[dataset_name]['react|softmax_t=1'])['auc']
            odin1_base  = compare_scores(datasets[dataset_id]['base|softmax_t=1000'], datasets[dataset_name]['base|softmax_t=1000'])['auc']
            odin1_react = compare_scores(datasets[dataset_id]['react|softmax_t=1000'], datasets[dataset_name]['react|softmax_t=1000'])['auc']
            odin2_base  = compare_scores(datasets[dataset_id]['base|odin_t=1000_epsilon=0.0014'], datasets[dataset_name]['base|odin_t=1000_epsilon=0.0014'])['auc']
            odin2_react = compare_scores(datasets[dataset_id]['react|odin_t=1000_epsilon=0.0014'], datasets[dataset_name]['react|odin_t=1000_epsilon=0.0014'])['auc']
            energy_base  = compare_scores(datasets[dataset_id]['base|energy'], datasets[dataset_name]['base|energy'])['auc']
            energy_react = compare_scores(datasets[dataset_id]['react|energy'], datasets[dataset_name]['react|energy'])['auc']
            ans = [
                (softmax_base, 'softmax_base'),
                (softmax_react, 'softmax_react'),
                (odin1_base, 'odin1_base'),
                (odin1_react, 'odin1_react'),
                (odin2_base, 'odin2_base'),
                (odin2_react, 'odin2_react'),
                (energy_base, 'energy_base'),
                (energy_react, 'energy_react'),                
            ]
            ans = sorted(ans, reverse=True)
            for idx, (val, name) in enumerate(ans):
                if not name in pos:
                    pos[name] = []
                pos[name].append(idx + 1) 
    print(model_name, sorted([(np.around(np.mean(pos[name]), 3), 
                               np.around(np.std(pos[name]), 3), name) for name in pos]))

## Model

In [None]:
pos = {}

for model_name in output:
    datasets = output[model_name]
    dataset_id = 'imagenet|'
    for dataset_name in common_datasets:
        if dataset_name != dataset_id:
            softmax_base  = compare_scores(datasets[dataset_id]['base|softmax_t=1'], datasets[dataset_name]['base|softmax_t=1'])['auc']
            softmax_react = compare_scores(datasets[dataset_id]['react|softmax_t=1'], datasets[dataset_name]['react|softmax_t=1'])['auc']
            odin1_base  = compare_scores(datasets[dataset_id]['base|softmax_t=1000'], datasets[dataset_name]['base|softmax_t=1000'])['auc']
            odin1_react = compare_scores(datasets[dataset_id]['react|softmax_t=1000'], datasets[dataset_name]['react|softmax_t=1000'])['auc']
            odin2_base  = compare_scores(datasets[dataset_id]['base|odin_t=1000_epsilon=0.0014'], datasets[dataset_name]['base|odin_t=1000_epsilon=0.0014'])['auc']
            odin2_react = compare_scores(datasets[dataset_id]['react|odin_t=1000_epsilon=0.0014'], datasets[dataset_name]['react|odin_t=1000_epsilon=0.0014'])['auc']
            energy_base  = compare_scores(datasets[dataset_id]['base|energy'], datasets[dataset_name]['base|energy'])['auc']
            energy_react = compare_scores(datasets[dataset_id]['react|energy'], datasets[dataset_name]['react|energy'])['auc']
            ans = [
                (softmax_base, f'softmax_base_{model_name}'),
                (softmax_react, f'softmax_react_{model_name}'),
                (odin1_base, f'odin1_base_{model_name}'),
                (odin1_react, f'odin1_react_{model_name}'),
                (odin2_base, f'odin2_base_{model_name}'),
                (odin2_react, f'odin2_react_{model_name}'),
                (energy_base, f'energy_base_{model_name}'),
                (energy_react, f'energy_react_{model_name}'),                
            ]
            ans = sorted(ans, reverse=True)
            for idx, (val, name) in enumerate(ans):
                if not name in pos:
                    pos[name] = []
                pos[name].append(idx + 1) 

sorted([(np.around(np.mean(pos[name]), 3), np.around(np.std(pos[name]), 3), name) for name in pos])[:20]

In [None]:
from PIL import Image
!mkdir images

for model_name in output:
    datasets = output[model_name]
    for dataset_name in datasets:
        print()
        im = Image.fromarray(np.uint8(255.0*datasets[dataset_name]['images'][0]))
        im.save(f'./images/{dataset_name.replace("|", "_")}.png')