# Imports

In [1]:
import numpy as np

In [2]:
import os.path as osp
import numpy as np
from tqdm import tqdm
from utils import pickle_save, pickle_load
from utils.data.delf import datum_io
from copy import deepcopy

In [3]:
import sacred
from sacred import SETTINGS
from sacred.utils import apply_backspaces_and_linefeeds
from numpy import linalg as LA
from utils import pickle_load, pickle_save
#from utils.revisited import compute_metrics
from utils.data.delf import datum_io

In [4]:
ex = sacred.Experiment('Prepare Top-K (VIQUAE FOR RTT)', interactive=True)
# Filter backspaces and linefeeds
SETTINGS.CAPTURE_MODE = 'sys'
ex.captured_out_filter = apply_backspaces_and_linefeeds

In [5]:
feature_name = 'r50_gldv1'
set_name = 'dev'
gnd_name = 'gnd_'+ set_name+'.pkl'

In [6]:
dataset_name = 'viquae_for_rrt'
data_dir = osp.join('/mnt/beegfs/home/smessoud/RerankingTransformer/models/research/delf/delf/python/delg/data', dataset_name)

In [7]:
use_aqe = False
aqe_params = {'k': 2, 'alpha': 0.3}

save_nn_inds = True

In [8]:
with open(osp.join(data_dir,  set_name+'_query.txt')) as fid:
    query_lines   = fid.read().splitlines()

In [9]:
len(query_lines)

1250

In [10]:
with open(osp.join(data_dir, set_name+'_gallery.txt')) as fid:
    gallery_lines = fid.read().splitlines()

In [11]:
query_feats = []
for i in tqdm(range(len(query_lines))):
    name = osp.splitext(osp.basename(query_lines[i].split(';;')[0]))[0]
    path = osp.join(data_dir, 'delg_' + feature_name, name + '.delg_global')
    query_feats.append(datum_io.ReadFromFile(path))

100%|██████████| 1250/1250 [00:01<00:00, 638.58it/s]


In [12]:
query_feats = np.stack(query_feats, axis=0)
query_feats = query_feats / LA.norm(query_feats, axis=-1)[:, None]

In [13]:
query_feats.shape

(1250, 2048)

In [14]:
selection_lines = np.genfromtxt('/mnt/beegfs/home/smessoud/RerankingTransformer/models/research/delf/delf/python/delg/data/viquae_for_rrt/'+
                                set_name+'_selection_imgs.txt', dtype='str')
selection_lines.shape

(1250, 100)

In [15]:
selection_lines[0][:10]

array(['512px-Lucas_Papademos.jpg', '512px-Zyp_Pfund1.jpg',
       '512px-Zyp_Pfund1.jpg', '512px-Iakovidis_-_Kontostavlos.jpg',
       '512px-Phoenix_Greek_coin_1828-1833.jpg',
       '512px-Phoenix_Greek_coin_1828-1833.jpg',
       '512px-Phoenix_Greek_coin_1828-1833.jpg',
       '512px-1_obol,_Ionian_Islands,_1819.jpg', '512px-Drachmen.jpg',
       '512px-Drachmen.jpg'], dtype='<U233')

In [16]:
# wiki_img   = '.'.join((wiki_item['image'].split('.')[:-1]))

In [17]:
selection_index_feats = []
for i in tqdm(range(len(selection_lines))):
    index_feats = []
    for image_file in selection_lines[i]:
        name = '.'.join((image_file.split('.')[:-1]))
        path = osp.join(data_dir, 'delg_' + feature_name, name + '.delg_global')
        index_feats.append(datum_io.ReadFromFile(path))
    selection_index_feats.append(index_feats)

100%|██████████| 1250/1250 [03:35<00:00,  5.79it/s]


In [18]:
min([len(selection_index_feats[i]) for i in range(len(selection_index_feats))])

100

In [19]:
selection_index_feats = np.array(selection_index_feats)
selection_index_feats.shape

(1250, 100, 2048)

In [20]:
query_feats[0].shape

(2048,)

In [21]:
selection_sims = []
for i in range(len(selection_index_feats)):
    index_feats = np.stack(selection_index_feats[i], axis=0)
    index_feats = index_feats / LA.norm(index_feats, axis=-1)[:, None]
    selection_sims.append(np.matmul(query_feats[i], index_feats.T))

In [22]:
sims = np.array(selection_sims)
sims.shape

(1250, 100)

In [23]:
sims

array([[0.15160514, 0.25095916, 0.25095916, ..., 0.3012856 , 0.1246489 ,
        0.26756623],
       [0.08161078, 0.18024106, 0.09156677, ..., 0.72684175, 0.72684157,
        0.72684157],
       [0.07149097, 0.17137687, 0.11596002, ..., 0.25148824, 0.10830951,
        0.22257602],
       ...,
       [0.27738544, 0.60081625, 0.12173056, ..., 0.4982748 , 0.38213605,
        0.33787534],
       [0.11040592, 0.49454427, 0.08045548, ..., 0.1148309 , 0.5187907 ,
        0.32204676],
       [0.60081625, 0.60081625, 0.15237401, ..., 0.35443673, 0.11360757,
        0.55823565]], dtype=float32)

In [24]:
if use_aqe:
    ## WARNING: I WAS TOO LAZY TO CORRECT IT
    ## IF YOU WANNA USE AQE PARAMATER - ADAPT THE CODE FOR VIQUAE-RRT
    alpha = aqe_params['alpha']
    nn_inds = np.argsort(-sims, -1)
    query_aug = deepcopy(query_feats)
    for i in range(len(query_feats)):
        new_q = [query_feats[i]]
        for j in range(aqe_params['k']):
            nn_id = nn_inds[i, j]
            weight = sims[i, nn_id] ** aqe_params['alpha']
            new_q.append(weight * index_feats[nn_id])
        new_q = np.stack(new_q, 0)
        new_q = np.mean(new_q, axis=0)
        query_aug[i] = new_q/LA.norm(new_q, axis=-1)
    sims = np.matmul(query_aug, index_feats.T)

In [25]:
selection_index_feats[0].shape

(100, 2048)

In [26]:
nn_inds = np.argsort(-sims, -1)
nn_dists = deepcopy(sims)
for i in range(query_feats.shape[0]):
    index_feats = selection_index_feats[i]
    for j in range(index_feats.shape[0]):
        nn_dists[i, j] = sims[i, nn_inds[i, j]]

In [27]:
nn_inds.shape

(1250, 100)

In [35]:
if save_nn_inds:
    output_path = osp.join(data_dir, set_name + '_nn_inds_%s.pkl' % feature_name)
    pickle_save(output_path, nn_inds)

In [29]:
def compute_ap(ranks, nres):
    """
    Computes average precision for given ranked indexes.
    
    Arguments
    ---------
    ranks : zerro-based ranks of positive images
    nres  : number of positive images
    
    Returns
    -------
    ap    : average precision
    """

    # number of images ranked by the system
    nimgranks = len(ranks)

    # accumulate trapezoids in PR-plot
    ap = 0

    recall_step = 1. / nres

    for j in np.arange(nimgranks):
        rank = ranks[j]

        if rank == 0:
            precision_0 = 1.
        else:
            precision_0 = float(j) / rank

        precision_1 = float(j + 1) / (rank + 1)

        ap += (precision_0 + precision_1) * recall_step / 2.

    return ap

In [30]:
def compute_map(ranks, gnd, kappas=[]):
    """
    Computes the mAP for a given set of returned results.

         Usage: 
           map = compute_map (ranks, gnd) 
                 computes mean average precsion (map) only
        
           map, aps, pr, prs = compute_map (ranks, gnd, kappas) 
                 computes mean average precision (map), average precision (aps) for each query
                 computes mean precision at kappas (pr), precision at kappas (prs) for each query
        
         Notes:
         1) ranks starts from 0, ranks.shape = db_size X #queries
         2) The junk results (e.g., the query itself) should be declared in the gnd stuct array
         3) If there are no positive images for some query, that query is excluded from the evaluation
    """

    map = 0.
    nq = len(gnd) # number of queries
    aps = np.zeros(nq)
    pr = np.zeros(len(kappas))
    prs = np.zeros((nq, len(kappas)))
    nempty = 0

    for i in np.arange(nq):
        qgnd = np.array(gnd[i]['ok'])
        qgndj = np.array(gnd[i]['junk'])

        # no positive images, skip from the average
        if qgnd.shape[0] == 0:
            aps[i] = float('nan')
            prs[i, :] = float('nan')
            nempty += 1
            continue

        # sorted positions of positive and junk images (0 based)
        pos  = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgnd)]
        if len(pos) == 0:
            pos = np.array(gnd[i]['ok']).max() * np.ones_like(gnd[i]['ok'])
        
        junk = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgndj)]

        k = 0
        ij = 0
        if len(junk):
            # decrease positions of positives based on the number of
            # junk images appearing before them
            ip = 0
            while (ip < len(pos)):
                while (ij < len(junk) and pos[ip] > junk[ij]):
                    k += 1
                    ij += 1
                pos[ip] = pos[ip] - k
                ip += 1

        # compute ap
        ap = compute_ap(pos, len(qgnd))
        map = map + ap
        aps[i] = ap

        # compute precision @ k
        pos += 1 # get it to 1-based
        for j in np.arange(len(kappas)):
            """
            if len(pos) == 0:
                max_pos = kappas[j]
            else: max_pos = max(pos)
            """
            kq = min(max(pos), kappas[j]); 
            prs[i, j] = (pos <= kq).sum() / kq
        pr = pr + prs[i, :]

    map = map / (nq - nempty)
    pr = pr / (nq - nempty)

    return map, aps, pr, prs

In [31]:
gnd_data = pickle_load(osp.join(data_dir, gnd_name))

In [32]:
def compute_metrics(dataset, ranks, gnd, kappas=[1, 5, 10]):
    print(ranks.shape)
    
    # old evaluation protocol
    if dataset.startswith('classic'):
        map, aps, _, _ = compute_map(ranks, gnd)
        out = {'map': np.around(map*100, decimals=3)}
        print('>> {}: mAP {:.2f}'.format(dataset, out['map']))

    # new evaluation protocol
    elif dataset.startswith('viquae'):
        
        gnd_t = []
        for i in range(len(gnd)):
            g = {}
            g['ok'] = np.concatenate([gnd[i]['hard']])
            g['junk'] = np.concatenate([gnd[i]['junk']])
            gnd_t.append(g)
        mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, kappas)


        out = {
            'H_map': np.around(mapH*100, decimals=2),
            'H_mp':  np.around(mprH*100, decimals=2),
        }

        print('>> {}: mAP H: {}'.format(dataset, out['H_map']))
        print('>> {}: mP@k{} H: {}'.format(dataset, kappas, out['H_mp']))

    return out, mapH, apsH, mprH, prsH

In [33]:
np.arange(nn_inds.T.shape[0])[np.in1d(nn_inds.T[:,i], gnd_data['gnd'][0]['junk'])]

array([ 1,  2,  3,  4,  5,  7, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21,
       22, 26, 28, 29, 30, 31, 36, 37, 38, 39, 41, 43, 44, 45, 46, 47, 48,
       49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 66,
       68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [34]:
compute_metrics('viquae', nn_inds.T, gnd_data['gnd'], kappas=[1,5,6,10])

(100, 1250)
>> viquae: mAP H: 93.72
>> viquae: mP@k[1, 5, 6, 10] H: [86.05 92.31 92.9  94.67]


({'H_map': 93.72, 'H_mp': array([86.05, 92.31, 92.9 , 94.67])},
 0.9371797055535545,
 array([1.        , 0.55      , 0.51388889, ..., 1.        , 1.        ,
        1.        ]),
 array([0.86052409, 0.92307692, 0.92899408, 0.94674556]),
 array([[1., 1., 1., 1.],
        [0., 1., 1., 1.],
        [0., 0., 0., 0.],
        ...,
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]))