diff --git a/NEWS.rst b/NEWS.rst index ba445dd..0f6e7d8 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -18,11 +18,6 @@ Release date: *not yet released* Python 2.6 a separate ```ordereddict``` pacakge has to be installed from PyPI. (`issue #39 `_) -* Allow `None` as a valid value for the `batch_size` argument to - :py:meth:`Table.scan`, since HBase does not support specifying a batch size - when some scanner filters are used. (`issue #54 - `_). - HappyBase 0.7 ------------- diff --git a/happybase/table.py b/happybase/table.py index abcceb0..c8f2a75 100644 --- a/happybase/table.py +++ b/happybase/table.py @@ -254,15 +254,6 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None, this to a low value (or even 1) if your data is large, since a low batch size results in added round-trips to the server. - .. warning:: - - Not all HBase filters can be used in combination with a batch - size. Explicitly specify `None` for the `batch_size` argument - in those cases to override the default value. Failure to do - so can result in hard to debug errors (not HappyBase's - fault), such as a non-responsive connection. The HBase logs - may contain more useful information in these situations. - **Compatibility notes:** * The `filter` argument is only available when using HBase 0.92 @@ -289,11 +280,11 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None, :return: generator yielding the rows matching the scan :rtype: iterable of `(row_key, row_data)` tuples """ - if batch_size is not None and batch_size < 1: - raise ValueError("'batch_size' must be >= 1 (or None)") + if batch_size < 1: + raise ValueError("'batch_size' must be >= 1") if limit is not None and limit < 1: - raise ValueError("'limit' must be >= 1 (or None)") + raise ValueError("'limit' must be >= 1") if sorted_columns and self.connection.compat < '0.96': raise NotImplementedError( @@ -358,16 +349,16 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None, n_returned = n_fetched = 0 try: while True: - if batch_size is None: - how_many = 1 - else: + if limit is None: how_many = batch_size + else: + how_many = min(batch_size, limit - n_returned) - if limit is not None: - how_many = min(how_many, limit - n_returned) - - items = self.connection.client.scannerGetList( - scan_id, how_many) + if how_many == 1: + items = self.connection.client.scannerGet(scan_id) + else: + items = self.connection.client.scannerGetList( + scan_id, how_many) n_fetched += len(items) diff --git a/tests/test_api.py b/tests/test_api.py index ed78f81..2d22717 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -357,7 +357,7 @@ def test_scan(): list(table.scan(row_prefix='foobar', row_start='xyz')) with assert_raises(ValueError): - list(table.scan(batch_size=0)) + list(table.scan(batch_size=None)) if connection.compat == '0.90': with assert_raises(NotImplementedError): @@ -446,13 +446,6 @@ def test_scan_sorting(): row.items()) -def test_scan_filter_and_batch_size(): - # See issue #54 - filter = "SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')" - for k, v in table.scan(filter=filter, batch_size=None): - print v - - def test_delete(): row_key = 'row-test-delete' data = {'cf1:col1': 'v1',