In [1]:
%env CUDA_VISIBLE_DEVICES=0
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, '..')

import faiss
import lib
import numpy as np
from functools import partial
import torch
import os

device_ids=list(range(torch.cuda.device_count()))

env: CUDA_VISIBLE_DEVICES=0


### Description

This notebook downloads and evaluates 4 checkpoints of UNQ model trained on BIGANN1M and DEEP1M datasets with 8- and 16-byte code sizes. You can also use this code to verify the corrrectness of your setup. If all library versions & hardware are set up properly, the code below should produce the exact same outputs as you can see below. These are also the numbers we report in Table 2 of our paper. 

In [2]:
for dataset_name, checkpoint_path, link in [
    ('BIGANN1M', 'checkpoints/sift_8b/checkpoint_best.pth', 'https://www.dropbox.com/s/ycf12yqu5cw4opr/checkpoint_best.pth?dl=1'),
    ('BIGANN1M', 'checkpoints/sift_16b/checkpoint_best.pth', 'https://www.dropbox.com/s/y7aucbm5gwyow9r/checkpoint_best.pth?dl=1'),
    ('DEEP1M', 'checkpoints/deep_8b/checkpoint_best.pth', 'https://www.dropbox.com/s/yvtm7y3f3412n9n/checkpoint_best.pth?dl=1'),
    ('DEEP1M', 'checkpoints/deep_16b/checkpoint_best.pth', 'https://www.dropbox.com/s/a0v988tb6i00qir/checkpoint_best.pth?dl=1')
]:
    print("Evaluating checkpoint {} on dataset {}".format(checkpoint_path, dataset_name))
    
    if not os.path.exists(checkpoint_path):
        os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)
        lib.utils.download(link, checkpoint_path,
                 chunk_size=4 * 1024 ** 2)
    
    if '8b' in checkpoint_path:
        num_codebooks = 8
    elif '16b' in checkpoint_path:
        num_codebooks = 16
    else:
        raise ValueError("Unexpected number of bytes. Make sure you know what you're doing")

    dataset = lib.Dataset(dataset_name, normalize=True)
    model = lib.UNQModel(input_dim=dataset.vector_dim, num_codebooks=num_codebooks).cuda()

    trainer = lib.Trainer(
        model=model, experiment_name='debug', device_ids=device_ids, loss_opts={},
        LearnedSimilaritySearch=partial(lib.UNQSearch, model=model, rerank_k=500, batch_size=1000,
                                        reorder_batch_size=250, device_ids=device_ids),
        NegativeSimilaritySearch=partial(lib.UNQSearch, model=model, rerank_k=1, batch_size=1000,
                                        reorder_batch_size=250, device_ids=device_ids),
    )
    trainer.load_checkpoint(path=checkpoint_path)
    print("Recall@1  :", trainer.evaluate_recall(dataset.test_vectors.cuda(), dataset.query_vectors.cuda(), k=1))
    print("Recall@10 :", trainer.evaluate_recall(dataset.test_vectors.cuda(), dataset.query_vectors.cuda(), k=10))
    print("Recall@100:", trainer.evaluate_recall(dataset.test_vectors.cuda(), dataset.query_vectors.cuda(), k=100))

Evaluating checkpoint checkpoints/sift_8b/checkpoint_best.pth on dataset BIGANN1M
Downloading https://www.dropbox.com/s/ycf12yqu5cw4opr/checkpoint_best.pth?dl=1 > checkpoints/sift_8b/checkpoint_best.pth


100%|██████████| 63152599/63152599 [00:01<00:00, 46457573.21it/s]


Downloading https://www.dropbox.com/s/zcnvsy7mlogj4g0/bigann_base1M.fvecs?dl=1 > ./data/BIGANN1M/bigann_base1M.fvecs


100%|██████████| 516000000/516000000 [00:09<00:00, 53227896.69it/s]


Downloading https://www.dropbox.com/s/dviygi2zhk57p9m/bigann_learn500k.fvecs?dl=1 > ./data/BIGANN1M/bigann_learn500k.fvecs


100%|██████████| 258000000/258000000 [00:04<00:00, 60326830.10it/s]


Downloading https://www.dropbox.com/s/is6anxwon6g5bpe/bigann_query10k.fvecs?dl=1 > ./data/BIGANN1M/bigann_query10k.fvecs


100%|██████████| 5160000/5160000 [00:00<00:00, 27784585.83it/s]


Recall@1  : 0.3458
Recall@10 : 0.8282
Recall@100: 0.9899
Evaluating checkpoint checkpoints/sift_16b/checkpoint_best.pth on dataset BIGANN1M
Downloading https://www.dropbox.com/s/y7aucbm5gwyow9r/checkpoint_best.pth?dl=1 > checkpoints/sift_16b/checkpoint_best.pth


100%|██████████| 94634547/94634547 [00:02<00:00, 42195598.15it/s]


Recall@1  : 0.5931
Recall@10 : 0.9798
Recall@100: 1.0
Evaluating checkpoint checkpoints/deep_8b/checkpoint_best.pth on dataset DEEP1M
Downloading https://www.dropbox.com/s/yvtm7y3f3412n9n/checkpoint_best.pth?dl=1 > checkpoints/deep_8b/checkpoint_best.pth


100%|██████████| 62365780/62365780 [00:03<00:00, 33500166.33it/s]


Downloading https://www.dropbox.com/s/e23sdc3twwn9syk/deep_base1M.fvecs?dl=1 > ./data/DEEP1M/deep_base1M.fvecs


100%|██████████| 388000000/388000000 [00:07<00:00, 53521585.16it/s]


Downloading https://www.dropbox.com/s/4i0c5o8jzvuloxy/deep_learn500k.fvecs?dl=1 > ./data/DEEP1M/deep_learn500k.fvecs


100%|██████████| 194000000/194000000 [00:04<00:00, 41919113.69it/s]


Downloading https://www.dropbox.com/s/5z087cxqh61n144/deep_query10k.fvecs?dl=1 > ./data/DEEP1M/deep_query10k.fvecs


100%|██████████| 3880000/3880000 [00:00<00:00, 5595664.66it/s]


Recall@1  : 0.2669
Recall@10 : 0.7259
Recall@100: 0.9734
Evaluating checkpoint checkpoints/deep_16b/checkpoint_best.pth on dataset DEEP1M
Downloading https://www.dropbox.com/s/a0v988tb6i00qir/checkpoint_best.pth?dl=1 > checkpoints/deep_16b/checkpoint_best.pth


100%|██████████| 93847732/93847732 [00:02<00:00, 44430298.26it/s]


Recall@1  : 0.479
Recall@10 : 0.9305
Recall@100: 0.998
