# Identification rate calculation

This jupyter notebook provide identification rate calculation for cplfw with distractors from megaface dataset.

In [30]:
import numpy as np
from sklearn.preprocessing import normalize
from ir_class import IrBigData
import os
from tqdm.notebook import tqdm
import pandas as pd

IrBigData._print_info = False

Define models for which we calculate features. Then run bash scripts that build models for given methods, get features for cplfw

In [31]:
methods = ['PCA', 'norms']

! python3 create_models.py --methods {' '.join(methods)}
! python3 calculate_features.py --methods {' '.join(methods)} --datasets cplfw megaface 

outliers: 100%|###################################| 3/3 [00:06<00:00,  2.28s/it]


In [32]:
cplfw_emb = normalize(np.load('image_embeddings/cplfw.npy'))
megaface_emb = normalize(np.load('image_embeddings/megaface.npy'))

with open('image_embeddings/labels/cplfw_labels.txt', encoding='utf-8') as txt_labels_file:
    lines = txt_labels_file.readlines()
cplfw_labels = np.array([i.rstrip('\n') for i in lines])


Define parameters we want to get

In [33]:
parameters_ir = {
    "similarity_type": "features",
    "fpr_threshold": 1e-5,
    "dist_type": "max_threshold",
    "protocol": "data_distractors_no_false_pairs",
    "N_distractors": 10000
}

In [42]:
# choose random distractors
indices_random = np.random.choice(len(megaface_emb), 
                                  size=parameters_ir['N_distractors'], 
                                  replace=False)
megaface_emb = megaface_emb[indices_random]


results_dict = {}
results_vr_dict = {}

pbar = tqdm(methods)
for method in pbar:
    pbar.set_description(method)        
    results_dict[method] = {}
    results_vr_dict[method] = {}
    results_arr = []
    results_vr_arr = []

    cplfw_features = np.load('features/cplfw/{}_dist.npy'.format(method))
    megaface_features = np.load('features/megaface/{}_dist.npy'.format(method))[indices_random]
    
    IR = IrBigData(cplfw_emb, cplfw_features, 
               cplfw_labels, parameters_ir, distractors=megaface_emb, 
               distractor_features=megaface_features)
    IR.params['similarity_type'] = 'features'
    
    quantiles_arr = [np.quantile(cplfw_features, i) for i in [0.985, 0.988, 0.992, 1.]]
    for alpha in tqdm(quantiles_arr, leave=False):
        IR.params['alpha'] = alpha
        IR.main()
        results_arr.append(IR.CMT_)
        results_vr_arr.append(IR.VR_)
    
    results_dict[method]['features'] = max(results_arr)
    results_vr_dict[method]['features'] = max(results_vr_arr)
    
    IR = IrBigData(cplfw_emb, None,
               cplfw_labels, parameters_ir, distractors=megaface_emb, 
               distractor_features=None)
    IR.params['similarity_type'] = 'cosine'
    IR.main()
    results_dict[method]['cosine'] = IR.CMT_
    results_vr_dict[method]['cosine'] = IR.VR_
    

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

In [43]:
results_arr

[0.7417846530326866, 0.7416098584163607, 0.7343121831847579, 0.668720503408495]

In [44]:
results_dict

{'PCA': {'features': 0.7192798461807376, 'cosine': 0.664875021849327},
 'norms': {'features': 0.7417846530326866, 'cosine': 0.664875021849327}}

In [45]:
results_vr_dict

{'PCA': {'features': 0.7212899842684846, 'cosine': 0.6664918720503409},
 'norms': {'features': 0.7443628736234924, 'cosine': 0.6664918720503409}}

In [20]:
a = pd.DataFrame.from_dict(results_vr_dict)

a['type'] = 'VR'

In [21]:
b = pd.DataFrame.from_dict(results_dict)

b['type'] = 'IR'

In [26]:
a

Unnamed: 0,PCA,norms
features,0.666317,0.666317
cosine,0.666492,0.666492


In [27]:
b

Unnamed: 0,PCA,norms
features,0.663564,0.663564
cosine,0.663739,0.663739


In [29]:
a.merge(b, how='outer')

Unnamed: 0,PCA,norms
0,0.666317,0.666317
1,0.666492,0.666492
2,0.663564,0.663564
3,0.663739,0.663739


array([20.789286, 18.429598, 23.404694, ..., 22.691399, 20.809158,
       21.739761], dtype=float32)