From 10f90444832ff0ae1d57f73a2074c884124c50c6 Mon Sep 17 00:00:00 2001
From: Yusuke Matsui <matsui@nii.ac.jp>
Date: Fri, 31 Aug 2018 11:11:08 +0900
Subject: [PATCH 1/4] bug of print functions. some docs update

---
 README.md                | 21 ++++++++++---------
 docs/source/tips.rst     | 44 +++++++++++++++++++++++++++++++---------
 docs/source/tutorial.rst |  7 +++++--
 rii/rii.py               |  5 ++++-
 4 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 8a59084..8c0101b 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 
 
 
-Reconfigurable Inverted Index (Rii): fast and memory efficient approximate nearest neighbor search method
+Reconfigurable Inverted Index (Rii): IVFPQ-based fast and memory efficient approximate nearest neighbor search method
 with a subset-search functionality.
 
 Reference:
@@ -19,9 +19,9 @@ Reference:
 ![](http://yusukematsui.me/project/rii/img/teaser1.png)  |  ![](http://yusukematsui.me/project/rii/img/teaser2.png)
 :---:|:---:
 The search can be operated for a subset of a database. | Rii remains fast even after many new items are added.
-- Fast and memory efficient ANN. Can handle billion-scale data on memory at once. The search is less than 10 ms.
-- Can run the search over a **subset** of the whole database
-- Remain fast even after a large number of vectors are newly added (i.e., the data structure can be **reconfigured**)
+- Fast and memory efficient ANN. Rii enables you to run billion-scale search in less than 10 ms.
+- You can run the search over a **subset** of the whole database
+- Rii Remains fast even after many vectors are newly added (i.e., the data structure can be **reconfigured**)
 
 
 ## Installing
@@ -62,12 +62,12 @@ e.add_configure(vecs=X)
 ids, dists = e.query(q=q, topk=3)
 print(ids, dists)  # e.g., [7484 8173 1556] [15.06257439 15.38533878 16.16935158]
 ```
-Note that, if you want, you can construct a codec at the same time as the instantiation of the Rii class
+Note that you can construct a PQ codec and instantiate the Rii class at the same time if you want.
 ```python
 e = rii.Rii(fine_quantizer=nanopq.PQ(M=32).fit(vecs=Xt))
 e.add_configure(vecs=X)
 ```
-Furthermore, you can even construct the class and add the vectors in one line
+Furthermore, you can even write them in one line by chaining a function.
 ```python
 e = rii.Rii(fine_quantizer=nanopq.PQ(M=32).fit(vecs=Xt)).add_configure(vecs=X)
 ```
@@ -109,7 +109,7 @@ with open('rii.pkl', 'rb') as f:
     e_dumped = pickle.load(f)  # e_dumped is identical to e
 ```
 
-### Utils
+### Util functions
 ```python
 # Print the current parameters
 e.print_params()
@@ -117,15 +117,16 @@ e.print_params()
 # Delete all PQ-codes and posting lists. fine_quantizer is kept.
 e.clear()
 
+# You can switch the verbose flag
+e.verbose = False
+
 # You can merge two Rii instances if they have the same fine_quantizer
 e1 = rii.Rii(fine_quantizer=codec)
 e2 = rii.Rii(fine_quantizer=codec)
 e1.add_reconfigure(vecs=X1)
 e2.add_reconfigure(vecs=X2)
-e1.merge(e2)  # e1 will have (PQ-codes of) both X1 and X2
+e1.merge(e2)  # Now e1 contains both X1 and X2
 
-# You can switch the verbose flag
-e.verbose = False
 ```
 
 ## [Examples](./examples)
diff --git a/docs/source/tips.rst b/docs/source/tips.rst
index b80635c..effa271 100644
--- a/docs/source/tips.rst
+++ b/docs/source/tips.rst
@@ -39,28 +39,32 @@ Some useful tips for tuning of search parameters:
 
 .. _sequential_add:
 
-Initializing a Rii class by adding vectors sequentially
+Adding vectors sequentially
 --------------------------------------------------------
 
-For the first data addition, one might want to add vectors one by one.
+You might want to add vectors one by one.
 There are two ways to achieve that.
 
 The first option is simply calling :func:`rii.Rii.add_configure` everytime.
 
 .. code-block:: python
 
+    # Suppose X is a set of vectors (np.ndarray with the shape (N, D))
     e = rii.Rii(fine_quantizer=codec)
     for x in X:
         e.add_configure(vecs=x.reshape(1, -1))  # Don't forget reshaping (D, ) to (1, D)
 
-This works perfectly. But this would take time if you would like to add many vectors
-by this way.
-It is because the reconfigure function is called (i.e., posting lists are computed from
-scrath) whenever each vector ``x`` is added.
+This works perfectly.
+But this would take time if you would like to add many vectors by this way.
+It is because the :func:`rii.Rii.reconfigure` function is called
+(inside :func:`rii.Rii.add_configure`) whenever a new vector ``x`` is added.
+The reconfiguration step creates postings list from scratch,
+that does not need to be run for every addition.
 
-Alternatively, you can call :func:`add` for each ``x`` without updating
+
+Alternatively, you can call :func:`rii.Rii.add` for each ``x`` without updating
 the posting lists, and run
-:func:`reconfigure` finally.
+:func:`rii.Rii.reconfigure` finally.
 
 .. code-block:: python
 
@@ -69,8 +73,28 @@ the posting lists, and run
         e.add(vecs=x.reshape(1, -1))  # Don't forget reshaping (D, ) to (1, D)
     e.reconfigure()
 
-This is much faster. The final result of both ways are same.
-But you must call :func:`rii.Rii.reconfigure` in the final step to create posting lists.
+This is much faster. The final results from both ways are identical.
+Please remember that you must call :func:`rii.Rii.reconfigure` in the final step to create posting lists.
+
+Note that, if you receive your data in a batch way, that can be handled in the same manner:
+
+.. code-block:: python
+
+    # X1 is a set of vectors (batch). Xs is a set of batches.
+    # You might receive Xs as a generator/iterator
+    # because the whole Xs is too large to read on memory at once
+    Xs = [X1, X2, X3]
+
+    # Running "add_configure" everytime
+    e1 = rii.Rii(fine_quantizer=codec)
+    for X in Xs:
+        e1.add_configure(vecs=X)
+
+    # Or, you can run "add" for each batch, and finally run "reconfigure"
+    e2 = rii.Rii(fine_quantizer=codec)
+    for X in Xs:
+        e2.add(vecs=X)
+    e2.reconfigure()
 
 
 
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 0e0efd5..58d2fee 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -72,6 +72,10 @@ Compared to PQ, OPQ is little bit slower for encoding/searching but slightly mor
     # Prepare a PQ/OPQ codec with M=32 sub spaces
     codec = nanopq.PQ(M=32, Ks=256, verbose=True).fit(vecs=Xt)  # Trained using Xt
 
+Here, ``M`` is a parameter to control the runtime, accuracy, and memory-consumption.
+Each input vector is divided by ``M`` parts later (hence ``D`` must be dividable by ``M``).
+With a larger ``M`` value, the search becomes more accurate but slower with a larger memory footprint.
+Another parameter ``Ks`` can be 256 for usual cases.
 See `the tutorial of nanopq <https://nanopq.readthedocs.io/en/latest/source/tutorial.html>`_
 for more details about the parameter selection of the codec.
 Note that you can use ``X`` or the part of ``X`` for training if you
@@ -122,8 +126,7 @@ Inside this function, :func:`rii.Rii.add` and :func:`rii.Rii.reconfigure` are ca
 
 Make sure that you must call :func:`rii.Rii.add_configure` (not :func:`rii.Rii.add`)
 for the first data addition. It is because you need to create coarse centers (posting lists).
-Note that, if you would like to add vectors sequentially
-when constructing the class, please refer this; :ref:`sequential_add`
+Note that, if you would like to add vectors sequentially, please refer this; :ref:`sequential_add`
 
 
 .. hint::
diff --git a/rii/rii.py b/rii/rii.py
index 875c433..7e802e0 100644
--- a/rii/rii.py
+++ b/rii/rii.py
@@ -338,7 +338,10 @@ def print_params(self):
         print("nlist:", self.nlist)
         print("L0:", self.L0)
         print("cordwords.shape:", self.codewords.shape)
-        print("coarse_centers.shape:", self.coarse_centers.shape)
+        if self.nlist == 0:
+            print("coarse_centers.shape:", None)
+        else:
+            print("coarse_centers.shape:", self.coarse_centers.shape)
 
         if self.codes is None:
             print("codes.shape:", None)

From beca908ea0c43815cdf29b894f6d5e7b627a41ff Mon Sep 17 00:00:00 2001
From: Yusuke Matsui <matsui@nii.ac.jp>
Date: Fri, 31 Aug 2018 11:13:33 +0900
Subject: [PATCH 2/4] updated changelog

---
 docs/source/changelog.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index b5ca2e5..b19a87a 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -6,6 +6,7 @@ v0.2.2 (August XX, 2018)
 - `#14 <https://github.com/matsui528/rii/pull/14>`_ Build on Mac with clang (without OpenMP)
 - `#16 <https://github.com/matsui528/rii/pull/16>`_ SIMD implementation for squared L2 distance (SSE, AVX, and AVX512)
 - `#18 <https://github.com/matsui528/rii/pull/18>`_ Implemented a merge function
+- `#20 <https://github.com/matsui528/rii/pull/20>`_ Bug fix
 
 v0.2.1 (August 24, 2018)
 ----------------------------

From 347a310df169a2415f6c76c08d8d27777c7c0903 Mon Sep 17 00:00:00 2001
From: Yusuke Matsui <matsui@nii.ac.jp>
Date: Fri, 31 Aug 2018 11:27:27 +0900
Subject: [PATCH 3/4] v0.2.2

---
 docs/source/changelog.rst | 2 +-
 setup.py                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index b19a87a..53229ea 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,7 +1,7 @@
 Changelog
 =============
 
-v0.2.2 (August XX, 2018)
+v0.2.2 (August 31, 2018)
 ----------------------------
 - `#14 <https://github.com/matsui528/rii/pull/14>`_ Build on Mac with clang (without OpenMP)
 - `#16 <https://github.com/matsui528/rii/pull/16>`_ SIMD implementation for squared L2 distance (SSE, AVX, and AVX512)
diff --git a/setup.py b/setup.py
index 5f33112..f77c3b4 100644
--- a/setup.py
+++ b/setup.py
@@ -105,7 +105,7 @@ def build_extensions(self):
 
 setup(
     name='rii',
-    version='0.2.1',
+    version='0.2.2',
     description='Fast and memory-efficient ANN with a subset-search functionality',
     long_description=readme,
     long_description_content_type='text/markdown',

From 00b1e8ae600c1bda8e6dd394066de0c8d3aff729 Mon Sep 17 00:00:00 2001
From: Yusuke Matsui <matsui@nii.ac.jp>
Date: Fri, 31 Aug 2018 11:35:04 +0900
Subject: [PATCH 4/4] pypi version badge

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 8c0101b..fc31f8c 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 
 [![Build Status](https://travis-ci.org/matsui528/rii.svg?branch=master)](https://travis-ci.org/matsui528/rii)
 [![Documentation Status](https://readthedocs.org/projects/rii/badge/?version=latest)](https://rii.readthedocs.io/en/latest/?badge=latest)
+[![PyPI version](https://badge.fury.io/py/rii.svg)](https://badge.fury.io/py/rii)