In [1]:
import json
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
import pandas as pd

In [2]:
with open("config.json", "r") as read_file:
    config_test = json.load(read_file)['test_params']

methods = ['OneClassSVM', 'PCA', 'norms']
datasets = ['calfw', 'flickr', 'ms1m', 'cplfw', 'megaface']

In [None]:
! python3 create_models.py --methods {' '.join(methods)}
! python3 calculate_features.py --shuffle --methods {' '.join(methods)} --datasets {' '.join(datasets)} 


^C
Traceback (most recent call last):
  File "create_models.py", line 47, in <module>
    clf = PCA(**config_dict['PCA']).fit(embeddings[embs_indices])
  File "/usr/local/lib/python3.6/dist-packages/sklearn/decomposition/_pca.py", line 359, in fit
    self._fit(X)
  File "/usr/local/lib/python3.6/dist-packages/sklearn/decomposition/_pca.py", line 432, in _fit
    return self._fit_truncated(X, n_components, self._fit_svd_solver)
  File "/usr/local/lib/python3.6/dist-packages/sklearn/decomposition/_pca.py", line 551, in _fit_truncated
    random_state=random_state)
  File "/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/sklearn/utils/extmath.py", line 351, in randomized_svd
    random_state=random_state)
  File "/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/usr/local/lib/pyth

In [4]:
def get_rocauc(distances_id, distances_ood):
    """Return roc score with 2 arrays as in-distribution
       and out-of-distribution arrays respectively"""
    y = np.array([1]*len(distances_id) + [0]*len(distances_ood))
    return roc_auc_score(1-y, np.concatenate((distances_id, distances_ood), axis=0))


def roc_auc_experiments(in_distr_features, ood_features, in_distr_fraction=config_test['proportion_of_outliers'],
                       n_experiments=config_test['n_experiments'], shuffle=False, extra_params=None):
    
    result = []
    emb_length = int(len(ood_features) / in_distr_fraction)
    
    for i in range(n_experiments):
        if shuffle:
            test_emb = in_distr_features[np.random.choice(len(in_distr_features), size=emb_length)]
        else:
            assert emb_length * n_experiments == len(in_distr_features)
            test_emb = in_distr_features[i*emb_length:(i+1)*emb_length]
            
        result.append(get_rocauc(test_emb, ood_features))
        
    return np.mean(result), np.std(result)


In [6]:
results = {}

for method in methods:
    ood_features = np.load('features/outliers/{}_dist.npy'.format(method))
    results[method] = {}
    for dataset in datasets:
        in_distr_features = np.load('features/{}/{}_dist.npy'.format(dataset, method))
        mean, std = roc_auc_experiments(in_distr_features, ood_features)
        conf_intv = 1.96 * std / np.sqrt(config_test['n_experiments'])
        
        results[method][dataset] = (np.round(mean, 2), np.round(conf_intv, 2))

results_df = pd.DataFrame.from_dict(results)


In [7]:
results_df

Unnamed: 0,OneClassSVM,PCA,norms
calfw,"(0.29, 0.0)","(0.12, 0.0)","(0.3, 0.0)"
flickr,"(0.38, 0.0)","(0.1, 0.0)","(0.28, 0.0)"
ms1m,"(0.32, 0.0)","(0.11, 0.0)","(0.25, 0.0)"
cplfw,"(0.31, 0.0)","(0.18, 0.0)","(0.26, 0.0)"
megaface,"(0.39, 0.0)","(0.18, 0.0)","(0.26, 0.0)"


In [None]:
        print("id rate only distractors =",
              (len(self.pairs_true_) - len(
                  self.pairs_false_distractors_)) / len(self.pairs_true_))

