# PQ Recall

## product quantization

In [None]:
import nanopq
import numpy as np

n1,n2, D = 10000, 2000, 128
np.random.seed(15)
X = np.random.randn(n1, D).astype(np.float32)  
queries = np.random.randn(n2,D).astype(np.float32)  

# Instantiate with M=8 sub-spaces,Ks=256 codewords in each sub-space
M,Ks=8,256
pq = nanopq.PQ(M=M,Ks=Ks)

# Train codewords
pq.fit(X)

# Encode to PQ-codes
X_code = pq.encode(X)  # (10000, 8) 

### compute recall

In [2]:
from evaluationRecall import Recall_PQ
rpq = Recall_PQ(M=M, Ks=Ks, D=D, pq_codebook = pq.codewords, pq_codes = X_code, metric="l2_distance")

ground_truth = rpq.brute_force_search(X, queries, metric = "l2_distance") 

In [3]:
rpq.pq_recall(queries, 100, ground_truth)

recall 1@100 = 0.528


# AQ Recall

The following is not additive quantization, only the codebooks and codes have the same structure as additive quantization

In [7]:
import numpy as np
from scipy.cluster.vq import kmeans2

n, nq, D = 10000, 2000, 128
np.random.seed(15)
X = np.random.randn(n, D).astype(np.float32)  
queries = np.random.randn(nq,D).astype(np.float32)
M,K = 8,256

centroid, code = kmeans2(X, K, minit='points')
centroid.shape  # shape = (256,128)

codebooks = centroid
codes = code 
RX = X
for i in range(1,M):
    RX = RX - centroid[code]

    centroid , code = kmeans2(RX, K)

    codebooks = np.r_[codebooks,centroid]
    codes = np.c_[codes,code]
print(codebooks.shape)
print(codes.shape)

(2048, 128)
(10000, 8)


## compute recall

In [8]:
from evaluationRecall import Recall_AQ
raq = Recall_AQ(M, K, D, codebooks,codes,metric="l2_distance")
ground_truth = raq.brute_force_search(X,queries,metric="l2_distance")

In [9]:
raq.aq_recall(queries, 100, ground_truth)

recall 1@100 = 0.6285
