Skip to content

Commit

Permalink
Merge pull request #30 from matsui528/small_fix
Browse files Browse the repository at this point in the history
Small fix for maintenance
  • Loading branch information
matsui528 committed Sep 8, 2023
2 parents ce1a835 + dd104de commit 8d3c7b2
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 43 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Expand Up @@ -13,7 +13,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v3

# Install the latest miniconda. The "test" environment is activated
- name: Setup miniconda
Expand All @@ -26,7 +26,7 @@ jobs:
shell: bash -l {0} # to activate conda
run: |
pip install pytest
pip install . # In stall this library
pip install . # Install this library
conda install -c pytorch "faiss-cpu<1.7.4" # 1.7.4 doesn't work as of May 2023. Should be updated some day.
- name: Test with pytest
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Expand Up @@ -60,7 +60,7 @@
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
@@ -1,2 +1,3 @@
numpy
scipy
faiss-cpu # To render docs easily, use un-official pypi version of faiss
9 changes: 5 additions & 4 deletions nanopq/convert_faiss.py
Expand Up @@ -13,11 +13,12 @@
from .pq import PQ

faiss_metric_map = {
'l2': faiss.METRIC_L2,
'dot': faiss.METRIC_INNER_PRODUCT,
'angular': faiss.METRIC_INNER_PRODUCT
"l2": faiss.METRIC_L2,
"dot": faiss.METRIC_INNER_PRODUCT,
"angular": faiss.METRIC_INNER_PRODUCT,
}


def nanopq_to_faiss(pq_nanopq):
"""Convert a :class:`nanopq.PQ` instance to `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_.
To use this function, `faiss module needs to be installed <https://github.com/facebookresearch/faiss/blob/master/INSTALL.md>`_.
Expand Down Expand Up @@ -54,7 +55,7 @@ def nanopq_to_faiss(pq_nanopq):


def faiss_to_nanopq(pq_faiss):
"""Convert a `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_
"""Convert a `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_
or a `faiss.IndexPreTransform <https://github.com/facebookresearch/faiss/blob/master/IndexPreTransform.h>`_ instance to :class:`nanopq.OPQ`.
To use this function, `faiss module needs to be installed <https://github.com/facebookresearch/faiss/blob/master/INSTALL.md>`_.
Expand Down
17 changes: 13 additions & 4 deletions nanopq/opq.py
@@ -1,7 +1,8 @@
from collections import defaultdict

import numpy as np

from collections import defaultdict
from .pq import PQ, DistanceTable
from .pq import PQ


class OPQ(object):
Expand All @@ -26,7 +27,7 @@ class OPQ(object):
"""

def __init__(self, M, Ks=256, metric='l2', verbose=True):
def __init__(self, M, Ks=256, metric="l2", verbose=True):
self.pq = PQ(M, Ks, metric=metric, verbose=verbose)
self.R = None

Expand Down Expand Up @@ -117,7 +118,15 @@ def eigenvalue_allocation(self, vecs):
R = R.astype(dtype=np.float32)
return R

def fit(self, vecs, parametric_init=False, pq_iter=20, rotation_iter=10, seed=123, minit='points'):
def fit(
self,
vecs,
parametric_init=False,
pq_iter=20,
rotation_iter=10,
seed=123,
minit="points",
):
"""Given training vectors, this function alternatively trains
(a) codewords and (b) a rotation matrix.
The procedure of training codewords is same as :func:`PQ.fit`.
Expand Down
57 changes: 34 additions & 23 deletions nanopq/pq.py
Expand Up @@ -10,10 +10,7 @@ def dist_ip(q, x):
return q @ x.T


metric_function_map = {
'l2': dist_l2,
'dot': dist_ip
}
metric_function_map = {"l2": dist_l2, "dot": dist_ip}


class PQ(object):
Expand Down Expand Up @@ -50,34 +47,46 @@ class PQ(object):
"""

def __init__(self, M, Ks=256, metric='l2', verbose=True):
assert 0 < Ks <= 2 ** 32
assert metric in ['l2', 'dot']
def __init__(self, M, Ks=256, metric="l2", verbose=True):
assert 0 < Ks <= 2**32
assert metric in ["l2", "dot"]
self.M, self.Ks, self.metric, self.verbose = M, Ks, metric, verbose
self.code_dtype = (
np.uint8 if Ks <= 2 ** 8 else (np.uint16 if Ks <= 2 ** 16 else np.uint32)
np.uint8 if Ks <= 2**8 else (np.uint16 if Ks <= 2**16 else np.uint32)
)
self.codewords = None
self.Ds = None

if verbose:
print("M: {}, Ks: {}, metric : {}, code_dtype: {}".format(
M, Ks, self.code_dtype, metric))
print(
"M: {}, Ks: {}, metric : {}, code_dtype: {}".format(
M, Ks, self.code_dtype, metric
)
)

def __eq__(self, other):
if isinstance(other, PQ):
return (self.M, self.Ks, self.metric, self.verbose, self.code_dtype, self.Ds) == (
return (
self.M,
self.Ks,
self.metric,
self.verbose,
self.code_dtype,
self.Ds,
) == (
other.M,
other.Ks,
other.metric,
other.verbose,
other.code_dtype,
other.Ds,
) and np.array_equal(self.codewords, other.codewords)
) and np.array_equal(
self.codewords, other.codewords
)
else:
return False

def fit(self, vecs, iter=20, seed=123, minit='points'):
def fit(self, vecs, iter=20, seed=123, minit="points"):
"""Given training vectors, run k-means for each sub-space and create
codewords for each sub-space.
Expand All @@ -98,7 +107,7 @@ def fit(self, vecs, iter=20, seed=123, minit='points'):
N, D = vecs.shape
assert self.Ks < N, "the number of training vector should be more than Ks"
assert D % self.M == 0, "input dimension must be dividable by M"
assert minit in ['random', '++', 'points', 'matrix']
assert minit in ["random", "++", "points", "matrix"]
self.Ds = int(D / self.M)

np.random.seed(seed)
Expand All @@ -110,9 +119,8 @@ def fit(self, vecs, iter=20, seed=123, minit='points'):
for m in range(self.M):
if self.verbose:
print("Training the subspace: {} / {}".format(m, self.M))
vecs_sub = vecs[:, m * self.Ds: (m + 1) * self.Ds]
self.codewords[m], _ = kmeans2(
vecs_sub, self.Ks, iter=iter, minit=minit)
vecs_sub = vecs[:, m * self.Ds : (m + 1) * self.Ds]
self.codewords[m], _ = kmeans2(vecs_sub, self.Ks, iter=iter, minit=minit)
return self

def encode(self, vecs):
Expand Down Expand Up @@ -189,9 +197,12 @@ def dtable(self, query):
# dtable[m][ks] : distance between m-th subvec and ks-th codeword of m-th codewords
dtable = np.empty((self.M, self.Ks), dtype=np.float32)
for m in range(self.M):
query_sub = query[m * self.Ds: (m + 1) * self.Ds]
dtable[m, :] = metric_function_map[self.metric](query_sub, self.codewords[m])
# In case of L2, the above line would be:
query_sub = query[m * self.Ds : (m + 1) * self.Ds]
dtable[m, :] = metric_function_map[self.metric](
query_sub, self.codewords[m]
)

# In case of L2, the above line would be:
# dtable[m, :] = np.linalg.norm(self.codewords[m] - query_sub, axis=1) ** 2

return DistanceTable(dtable, metric=self.metric)
Expand All @@ -216,10 +227,10 @@ class DistanceTable(object):
"""

def __init__(self, dtable, metric='l2'):
def __init__(self, dtable, metric="l2"):
assert dtable.ndim == 2
assert dtable.dtype == np.float32
assert metric in ['l2', 'dot']
assert metric in ["l2", "dot"]
self.dtable = dtable
self.metric = metric

Expand All @@ -240,7 +251,7 @@ def adist(self, codes):
N, M = codes.shape
assert M == self.dtable.shape[0]

# Fetch distance values using codes. The following codes are
# Fetch distance values using codes.
dists = np.sum(self.dtable[range(M), codes], axis=1)

# The above line is equivalent to the followings:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
@@ -1,5 +1,5 @@
[tool.pysen]
version = "0.9"
version = "0.10"

[tool.pysen.lint]
enable_black = true
Expand Down
6 changes: 3 additions & 3 deletions tests/test_convert_faiss.py
Expand Up @@ -142,12 +142,12 @@ def test_faiss_nanopq_compare_accuracy(self):
Xb_nanopq_ = pq_nanopq.decode(codes=Cb_nanopq)

# Reconstruction error should be almost identical
avg_relative_error_faiss = ((Xb - Xb_faiss_) ** 2).sum() / (Xb ** 2).sum()
avg_relative_error_nanopq = ((Xb - Xb_nanopq_) ** 2).sum() / (Xb ** 2).sum()
avg_relative_error_faiss = ((Xb - Xb_faiss_) ** 2).sum() / (Xb**2).sum()
avg_relative_error_nanopq = ((Xb - Xb_nanopq_) ** 2).sum() / (Xb**2).sum()
diff_rel = (
avg_relative_error_faiss - avg_relative_error_nanopq
) / avg_relative_error_faiss
diff_rel = np.sqrt(diff_rel ** 2)
diff_rel = np.sqrt(diff_rel**2)
print("avg_rel_error_faiss:", avg_relative_error_faiss)
print("avg_rel_error_nanopq:", avg_relative_error_nanopq)
print("diff rel:", diff_rel)
Expand Down
9 changes: 4 additions & 5 deletions tests/test_pq.py
Expand Up @@ -15,7 +15,7 @@ def setUp(self):
def test_instantiate(self):
pq1 = nanopq.PQ(M=4, Ks=256)
pq2 = nanopq.PQ(M=4, Ks=500)
pq3 = nanopq.PQ(M=4, Ks=2 ** 16 + 10)
pq3 = nanopq.PQ(M=4, Ks=2**16 + 10)
self.assertEqual(pq1.code_dtype, np.uint8)
self.assertEqual(pq2.code_dtype, np.uint16)
self.assertEqual(pq3.code_dtype, np.uint32)
Expand All @@ -31,7 +31,6 @@ def test_fit(self):
pq2 = nanopq.PQ(M=M, Ks=Ks).fit(X) # Can be called as a chain
self.assertTrue(np.allclose(pq.codewords, pq2.codewords))


def test_eq(self):
import copy

Expand Down Expand Up @@ -98,11 +97,11 @@ def test_pickle(self):
)
self.assertTrue(np.allclose(pq.codewords, pq2.codewords))
self.assertTrue(pq == pq2)

def test_ip(self):
N, D, M, Ks = 100, 12, 4, 10
X = np.random.random((N, D)).astype(np.float32)
pq = nanopq.PQ(M=M, Ks=Ks, metric='dot')
pq = nanopq.PQ(M=M, Ks=Ks, metric="dot")
pq.fit(X)
X_ = pq.encode(X)
q = X[13]
Expand All @@ -114,7 +113,7 @@ def test_ip(self):
dist2 = np.sum(dtable[range(M), X_], axis=1)
self.assertTrue((dist1 == dist2).all())
self.assertTrue(abs(np.mean(np.matmul(X, q[:, None]).squeeze() - dist1)) < 1e-7)


if __name__ == "__main__":
unittest.main()

0 comments on commit 8d3c7b2

Please sign in to comment.