Merge pull request #30 from matsui528/small_fix

Small fix for maintenance
matsui528 · Sep 8, 2023 · 8d3c7b2 · 8d3c7b2
2 parents ce1a835 + dd104de
commit 8d3c7b2
Show file tree

Hide file tree

Showing 9 changed files with 64 additions and 43 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -13,7 +13,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
 
     # Install the latest miniconda. The "test" environment is activated 
     - name: Setup miniconda
@@ -26,7 +26,7 @@ jobs:
       shell: bash -l {0}    # to activate conda
       run: |
         pip install pytest
-        pip install .   # In stall this library
+        pip install .   # Install this library
         conda install -c pytorch "faiss-cpu<1.7.4"  # 1.7.4 doesn't work as of May 2023. Should be updated some day.
         
     - name: Test with pytest

diff --git a/docs/conf.py b/docs/conf.py
@@ -60,7 +60,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+# language = None
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,2 +1,3 @@
 numpy
 scipy
+faiss-cpu    # To render docs easily, use un-official pypi version of faiss
diff --git a/nanopq/convert_faiss.py b/nanopq/convert_faiss.py
@@ -13,11 +13,12 @@
 from .pq import PQ
 
 faiss_metric_map = {
-    'l2': faiss.METRIC_L2,
-    'dot': faiss.METRIC_INNER_PRODUCT,
-    'angular': faiss.METRIC_INNER_PRODUCT
+    "l2": faiss.METRIC_L2,
+    "dot": faiss.METRIC_INNER_PRODUCT,
+    "angular": faiss.METRIC_INNER_PRODUCT,
 }
 
+
 def nanopq_to_faiss(pq_nanopq):
     """Convert a :class:`nanopq.PQ` instance to `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_.
     To use this function, `faiss module needs to be installed <https://github.com/facebookresearch/faiss/blob/master/INSTALL.md>`_.
@@ -54,7 +55,7 @@ def nanopq_to_faiss(pq_nanopq):
 
 
 def faiss_to_nanopq(pq_faiss):
-    """Convert a `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_ 
+    """Convert a `faiss.IndexPQ <https://github.com/facebookresearch/faiss/blob/master/IndexPQ.h>`_
     or a `faiss.IndexPreTransform <https://github.com/facebookresearch/faiss/blob/master/IndexPreTransform.h>`_ instance to :class:`nanopq.OPQ`.
     To use this function, `faiss module needs to be installed <https://github.com/facebookresearch/faiss/blob/master/INSTALL.md>`_.
 

diff --git a/nanopq/opq.py b/nanopq/opq.py
@@ -1,7 +1,8 @@
+from collections import defaultdict
+
 import numpy as np
 
-from collections import defaultdict
-from .pq import PQ, DistanceTable
+from .pq import PQ
 
 
 class OPQ(object):
@@ -26,7 +27,7 @@ class OPQ(object):
 
     """
 
-    def __init__(self, M, Ks=256, metric='l2', verbose=True):
+    def __init__(self, M, Ks=256, metric="l2", verbose=True):
         self.pq = PQ(M, Ks, metric=metric, verbose=verbose)
         self.R = None
 
@@ -117,7 +118,15 @@ def eigenvalue_allocation(self, vecs):
         R = R.astype(dtype=np.float32)
         return R
 
-    def fit(self, vecs, parametric_init=False, pq_iter=20, rotation_iter=10, seed=123, minit='points'):
+    def fit(
+        self,
+        vecs,
+        parametric_init=False,
+        pq_iter=20,
+        rotation_iter=10,
+        seed=123,
+        minit="points",
+    ):
         """Given training vectors, this function alternatively trains
         (a) codewords and (b) a rotation matrix.
         The procedure of training codewords is same as :func:`PQ.fit`.

diff --git a/nanopq/pq.py b/nanopq/pq.py
@@ -10,10 +10,7 @@ def dist_ip(q, x):
     return q @ x.T
 
 
-metric_function_map = {
-    'l2': dist_l2,
-    'dot': dist_ip
-}
+metric_function_map = {"l2": dist_l2, "dot": dist_ip}
 
 
 class PQ(object):
@@ -50,34 +47,46 @@ class PQ(object):
 
     """
 
-    def __init__(self, M, Ks=256, metric='l2', verbose=True):
-        assert 0 < Ks <= 2 ** 32
-        assert metric in ['l2', 'dot']
+    def __init__(self, M, Ks=256, metric="l2", verbose=True):
+        assert 0 < Ks <= 2**32
+        assert metric in ["l2", "dot"]
         self.M, self.Ks, self.metric, self.verbose = M, Ks, metric, verbose
         self.code_dtype = (
-            np.uint8 if Ks <= 2 ** 8 else (np.uint16 if Ks <= 2 ** 16 else np.uint32)
+            np.uint8 if Ks <= 2**8 else (np.uint16 if Ks <= 2**16 else np.uint32)
         )
         self.codewords = None
         self.Ds = None
 
         if verbose:
-            print("M: {}, Ks: {}, metric : {}, code_dtype: {}".format(
-                M, Ks, self.code_dtype, metric))
+            print(
+                "M: {}, Ks: {}, metric : {}, code_dtype: {}".format(
+                    M, Ks, self.code_dtype, metric
+                )
+            )
 
     def __eq__(self, other):
         if isinstance(other, PQ):
-            return (self.M, self.Ks, self.metric, self.verbose, self.code_dtype, self.Ds) == (
+            return (
+                self.M,
+                self.Ks,
+                self.metric,
+                self.verbose,
+                self.code_dtype,
+                self.Ds,
+            ) == (
                 other.M,
                 other.Ks,
                 other.metric,
                 other.verbose,
                 other.code_dtype,
                 other.Ds,
-            ) and np.array_equal(self.codewords, other.codewords)
+            ) and np.array_equal(
+                self.codewords, other.codewords
+            )
         else:
             return False
 
-    def fit(self, vecs, iter=20, seed=123, minit='points'):
+    def fit(self, vecs, iter=20, seed=123, minit="points"):
         """Given training vectors, run k-means for each sub-space and create
         codewords for each sub-space.
 
@@ -98,7 +107,7 @@ def fit(self, vecs, iter=20, seed=123, minit='points'):
         N, D = vecs.shape
         assert self.Ks < N, "the number of training vector should be more than Ks"
         assert D % self.M == 0, "input dimension must be dividable by M"
-        assert minit in ['random', '++', 'points', 'matrix']
+        assert minit in ["random", "++", "points", "matrix"]
         self.Ds = int(D / self.M)
 
         np.random.seed(seed)
@@ -110,9 +119,8 @@ def fit(self, vecs, iter=20, seed=123, minit='points'):
         for m in range(self.M):
             if self.verbose:
                 print("Training the subspace: {} / {}".format(m, self.M))
-            vecs_sub = vecs[:, m * self.Ds: (m + 1) * self.Ds]
-            self.codewords[m], _ = kmeans2(
-                vecs_sub, self.Ks, iter=iter, minit=minit)
+            vecs_sub = vecs[:, m * self.Ds : (m + 1) * self.Ds]
+            self.codewords[m], _ = kmeans2(vecs_sub, self.Ks, iter=iter, minit=minit)
         return self
 
     def encode(self, vecs):
@@ -189,9 +197,12 @@ def dtable(self, query):
         # dtable[m][ks] : distance between m-th subvec and ks-th codeword of m-th codewords
         dtable = np.empty((self.M, self.Ks), dtype=np.float32)
         for m in range(self.M):
-            query_sub = query[m * self.Ds: (m + 1) * self.Ds]
-            dtable[m, :] = metric_function_map[self.metric](query_sub, self.codewords[m])
-            # In case of L2, the above line would be: 
+            query_sub = query[m * self.Ds : (m + 1) * self.Ds]
+            dtable[m, :] = metric_function_map[self.metric](
+                query_sub, self.codewords[m]
+            )
+
+            # In case of L2, the above line would be:
             # dtable[m, :] = np.linalg.norm(self.codewords[m] - query_sub, axis=1) ** 2
 
         return DistanceTable(dtable, metric=self.metric)
@@ -216,10 +227,10 @@ class DistanceTable(object):
 
     """
 
-    def __init__(self, dtable, metric='l2'):
+    def __init__(self, dtable, metric="l2"):
         assert dtable.ndim == 2
         assert dtable.dtype == np.float32
-        assert metric in ['l2', 'dot']
+        assert metric in ["l2", "dot"]
         self.dtable = dtable
         self.metric = metric
 
@@ -240,7 +251,7 @@ def adist(self, codes):
         N, M = codes.shape
         assert M == self.dtable.shape[0]
 
-        # Fetch distance values using codes. The following codes are
+        # Fetch distance values using codes.
         dists = np.sum(self.dtable[range(M), codes], axis=1)
 
         # The above line is equivalent to the followings:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [tool.pysen]
-version = "0.9"
+version = "0.10"
 
 [tool.pysen.lint]
 enable_black = true

diff --git a/tests/test_convert_faiss.py b/tests/test_convert_faiss.py
@@ -142,12 +142,12 @@ def test_faiss_nanopq_compare_accuracy(self):
         Xb_nanopq_ = pq_nanopq.decode(codes=Cb_nanopq)
 
         # Reconstruction error should be almost identical
-        avg_relative_error_faiss = ((Xb - Xb_faiss_) ** 2).sum() / (Xb ** 2).sum()
-        avg_relative_error_nanopq = ((Xb - Xb_nanopq_) ** 2).sum() / (Xb ** 2).sum()
+        avg_relative_error_faiss = ((Xb - Xb_faiss_) ** 2).sum() / (Xb**2).sum()
+        avg_relative_error_nanopq = ((Xb - Xb_nanopq_) ** 2).sum() / (Xb**2).sum()
         diff_rel = (
             avg_relative_error_faiss - avg_relative_error_nanopq
         ) / avg_relative_error_faiss
-        diff_rel = np.sqrt(diff_rel ** 2)
+        diff_rel = np.sqrt(diff_rel**2)
         print("avg_rel_error_faiss:", avg_relative_error_faiss)
         print("avg_rel_error_nanopq:", avg_relative_error_nanopq)
         print("diff rel:", diff_rel)

diff --git a/tests/test_pq.py b/tests/test_pq.py
@@ -15,7 +15,7 @@ def setUp(self):
     def test_instantiate(self):
         pq1 = nanopq.PQ(M=4, Ks=256)
         pq2 = nanopq.PQ(M=4, Ks=500)
-        pq3 = nanopq.PQ(M=4, Ks=2 ** 16 + 10)
+        pq3 = nanopq.PQ(M=4, Ks=2**16 + 10)
         self.assertEqual(pq1.code_dtype, np.uint8)
         self.assertEqual(pq2.code_dtype, np.uint16)
         self.assertEqual(pq3.code_dtype, np.uint32)
@@ -31,7 +31,6 @@ def test_fit(self):
         pq2 = nanopq.PQ(M=M, Ks=Ks).fit(X)  # Can be called as a chain
         self.assertTrue(np.allclose(pq.codewords, pq2.codewords))
 
-
     def test_eq(self):
         import copy
 
@@ -98,11 +97,11 @@ def test_pickle(self):
         )
         self.assertTrue(np.allclose(pq.codewords, pq2.codewords))
         self.assertTrue(pq == pq2)
-        
+
     def test_ip(self):
         N, D, M, Ks = 100, 12, 4, 10
         X = np.random.random((N, D)).astype(np.float32)
-        pq = nanopq.PQ(M=M, Ks=Ks, metric='dot')
+        pq = nanopq.PQ(M=M, Ks=Ks, metric="dot")
         pq.fit(X)
         X_ = pq.encode(X)
         q = X[13]
@@ -114,7 +113,7 @@ def test_ip(self):
         dist2 = np.sum(dtable[range(M), X_], axis=1)
         self.assertTrue((dist1 == dist2).all())
         self.assertTrue(abs(np.mean(np.matmul(X, q[:, None]).squeeze() - dist1)) < 1e-7)
-        
+
 
 if __name__ == "__main__":
     unittest.main()