In [36]:
import json
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
import pandas as pd

In [37]:
with open("config.json", "r") as read_file:
    config_test = json.load(read_file)['test_params']

n_experiments = config_test['n_experiments']
methods = ['OneClassSVM', 'PCA', 'norms']
datasets = ['calfw', 'flickr']

In [42]:
! python3 create_models.py --methods {' '.join(methods)}
! python3 calculate_features.py --methods {' '.join(methods)} --datasets {' '.join(datasets)} 


In [39]:
def get_rocauc(distances_id, distances_ood):
    """Return roc score with 2 arrays as in-distribution
       and out-of-distribution arrays respectively"""
    y = np.array([1]*len(distances_id) + [0]*len(distances_ood))
    return roc_auc_score(1-y, np.concatenate((distances_id, distances_ood), axis=0))


def roc_auc_experiments(in_distr_features, ood_features, in_distr_fraction=config_test['proportion_of_outliers'],
                       n_experiments=n_experiments, shuffle=False, extra_params=None):
    
    result = []
    emb_length = int(len(ood_features) / in_distr_fraction)
    
    for i in range(n_experiments):
        if shuffle:
            test_emb = in_distr_features[np.random.choice(len(in_distr_features), size=emb_length)]
        else:
            assert emb_length * n_experiments == len(in_distr_features)
            test_emb = in_distr_features[i*emb_length:(i+1)*emb_length]
            
        result.append(get_rocauc(test_emb, ood_features))
        
    return np.mean(result), np.std(result)


In [40]:
results = {}

for method in methods:
    ood_features = np.load('features/outliers/{}_dist.npy'.format(method))
    results[method] = {}
    for dataset in datasets:
        in_distr_features = np.load('features/{}/{}_dist.npy'.format(dataset, method))
        mean, std = roc_auc_experiments(in_distr_features, ood_features)
        conf_intv = 1.96 * std / np.sqrt(n_experiments)
        
        results[method][dataset] = (np.round(mean, 2), np.round(conf_intv, 2))

results_df = pd.DataFrame.from_dict(results)


In [41]:
results_df

Unnamed: 0,OneClassSVM,PCA,norms
calfw,"(22.55, 0.06)","(5.95, 0.01)","(29.68, 0.05)"
flickr,"(34.74, 0.06)","(17.24, 0.04)","(28.04, 0.05)"
