In [1]:
%load_ext autoreload

%autoreload 2
from utils import parse_table, read_csv
import pandas as pd
import numpy as np

from pretty import plot_kde, ColorTheme


def get_scores(in_distr, ood, type, all_tasks): 
    global score_generated, score_in, score_ood
    in_vs_out = all_tasks[type][in_distr][ood]
    score_generated = in_vs_out[in_vs_out['name'] == 'generated']['negative z_scores'].values
    score_in = in_vs_out[in_vs_out['name'] == 'test']['negative z_scores'].values
    score_ood = in_vs_out[in_vs_out['name'] == 'ood']['negative z_scores'].values
    
print_mapping = {
    'fashion-mnist': 'FMNIST',
    'mnist': 'MNIST',
    'emnist': 'EMNIST',
    'omniglot': 'Omniglot',
    'celeba-small': 'CelebA',
    'svhn': 'SVHN',
    'cifar10': 'CIFAR10',
    'cifar100': 'CIFAR100',
    'tiny-imagenet': 'Tiny',
}

df_grayscale = read_csv('reconstruction_dm_baseline_grayscale.csv')
df_rgb = read_csv('reconstruction_dm_baseline_rgb.csv')
all_grayscale_tasks = parse_table(df_grayscale)
all_rgb_tasks = parse_table(df_rgb)
all_tasks = {
    'grayscale': all_grayscale_tasks,
    'rgb': all_rgb_tasks,
}

3/5: 100%|██████████| 4/4 [00:01<00:00,  2.82it/s]
5/7: 100%|██████████| 5/5 [00:15<00:00,  3.01s/it]


In [2]:
from roc_analysis import get_auc, get_convex_hull, get_roc_graph

all_pairs = []
for tp in all_tasks.keys():
    for in_distr in all_tasks[tp].keys():
            for ood in all_tasks[tp].keys():
                if in_distr != ood:
                    all_pairs.append((in_distr, ood, tp))

for in_distr, ood, tp in all_pairs:
    print(f'{print_mapping[in_distr]} vs {print_mapping[ood]}')
    get_scores(in_distr, ood, tp, all_tasks)
    x_naive, y_naive = get_roc_graph(
        pos_x = score_in,
        neg_x = score_ood,
        verbose=0,
    )
    x_curve, y_curve = get_convex_hull(x_naive, y_naive)
    test_vs_ood_auc = get_auc(x_curve, y_curve)
    print("AUC of in-vs-out", "{:.3f}".format(test_vs_ood_auc))
    get_scores(in_distr, ood, tp, all_tasks)
    x_naive, y_naive = get_roc_graph(
        pos_x = score_generated,
        neg_x = score_ood,
        verbose=0,
    )
    x_curve, y_curve = get_convex_hull(x_naive, y_naive)
    generated_vs_ood_auc = get_auc(x_curve, y_curve)
    print("AUC of generated-vs-out", "{:.3f}".format(generated_vs_ood_auc))
    print("----")

EMNIST vs MNIST
AUC of in-vs-out 0.900
AUC of generated-vs-out 0.958
----
EMNIST vs Omniglot
AUC of in-vs-out 1.000
AUC of generated-vs-out 1.000
----
EMNIST vs FMNIST
AUC of in-vs-out 1.000
AUC of generated-vs-out 1.000
----
MNIST vs EMNIST
AUC of in-vs-out 0.970
AUC of generated-vs-out 0.998
----
MNIST vs Omniglot
AUC of in-vs-out 0.999
AUC of generated-vs-out 1.000
----
MNIST vs FMNIST
AUC of in-vs-out 1.000
AUC of generated-vs-out 1.000
----
Omniglot vs EMNIST
AUC of in-vs-out 0.998
AUC of generated-vs-out 0.999
----
Omniglot vs MNIST
AUC of in-vs-out 0.980
AUC of generated-vs-out 0.998
----
Omniglot vs FMNIST
AUC of in-vs-out 1.000
AUC of generated-vs-out 1.000
----
FMNIST vs EMNIST
AUC of in-vs-out 0.990
AUC of generated-vs-out 0.996
----
FMNIST vs MNIST
AUC of in-vs-out 0.981
AUC of generated-vs-out 0.994
----
FMNIST vs Omniglot
AUC of in-vs-out 0.996
AUC of generated-vs-out 0.998
----
Tiny vs CIFAR100
AUC of in-vs-out 0.462
AUC of generated-vs-out 0.447
----
Tiny vs CelebA
AUC 