From a33b57808f06d86d270713898ad07bf2739c30ed Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 5 Jun 2018 14:20:15 -0500 Subject: [PATCH 1/7] Avoid using .index() --- toolz/itertoolz.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index a25eea3c..2ac2387e 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -720,7 +720,12 @@ def partition_all(n, seq): yield prev prev = item if prev[-1] is no_pad: - yield prev[:prev.index(no_pad)] + # Get first index of no_pad without using .index() + # https://github.com/pytoolz/toolz/issues/387 + for ind, item in enumerate(reversed(prev)): + if item is not no_pad: + yield prev[:len(prev)-ind] + break else: yield prev From 90ea648f9a42e9b84409df6895f1fb7507480ba1 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 5 Jun 2018 16:13:02 -0500 Subject: [PATCH 2/7] Try to find first index of no_pad more intelligently. --- toolz/itertoolz.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 2ac2387e..d742dd68 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -720,12 +720,22 @@ def partition_all(n, seq): yield prev prev = item if prev[-1] is no_pad: - # Get first index of no_pad without using .index() - # https://github.com/pytoolz/toolz/issues/387 - for ind, item in enumerate(reversed(prev)): - if item is not no_pad: - yield prev[:len(prev)-ind] - break + try: + # If seq defines __len__, then we can quickly calculate where no_pad starts + yield prev[:len(seq) % n] + except TypeError: + # Get first index of no_pad without using .index() + # https://github.com/pytoolz/toolz/issues/387 + # We can employ modified binary search here to speed things up from O(n) to O(log n) + # Binary search from CPython's bisect module, modified for identity testing. + lo, hi = 0, len(prev) + while lo < hi: + mid = (lo + hi) // 2 + if prev[mid] is no_pad: + hi = mid + else: + lo = mid + 1 + yield prev[:lo] else: yield prev From c323698462133a56ac66af5c58cfb3fe65bcc53d Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 5 Jun 2018 16:36:58 -0500 Subject: [PATCH 3/7] len(prev) == n --- toolz/itertoolz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index d742dd68..6a3d4e22 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -728,7 +728,7 @@ def partition_all(n, seq): # https://github.com/pytoolz/toolz/issues/387 # We can employ modified binary search here to speed things up from O(n) to O(log n) # Binary search from CPython's bisect module, modified for identity testing. - lo, hi = 0, len(prev) + lo, hi = 0, n while lo < hi: mid = (lo + hi) // 2 if prev[mid] is no_pad: From 0b24bb1d044d6cc230c43210d91abf69e07a50a0 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 5 Jun 2018 22:40:32 -0500 Subject: [PATCH 4/7] Add regression test. --- toolz/tests/test_itertoolz.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index 93aa856d..02154e25 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -318,6 +318,16 @@ def test_partition_all(): assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)] assert list(partition_all(2, [])) == [] + # Regression test: https://github.com/pytoolz/toolz/issues/387 + class NoCompare(object): + def __eq__(self, other): + if self.__class__ == other.__class__: + return True + raise ValueError() + obj = NoCompare() + assert list(partition_all(2, [obj]*3)) == [(obj, obj), (obj,)] + + def test_count(): assert count((1, 2, 3)) == 3 From 03731458b385b2f445ee5e12ef6b7d5c46ae0eb9 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Tue, 5 Jun 2018 22:43:22 -0500 Subject: [PATCH 5/7] Fix comments to make pep8 happy. --- toolz/itertoolz.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/toolz/itertoolz.py b/toolz/itertoolz.py index 6a3d4e22..71c7ddc0 100644 --- a/toolz/itertoolz.py +++ b/toolz/itertoolz.py @@ -721,13 +721,14 @@ def partition_all(n, seq): prev = item if prev[-1] is no_pad: try: - # If seq defines __len__, then we can quickly calculate where no_pad starts + # If seq defines __len__, then + # we can quickly calculate where no_pad starts yield prev[:len(seq) % n] except TypeError: # Get first index of no_pad without using .index() # https://github.com/pytoolz/toolz/issues/387 - # We can employ modified binary search here to speed things up from O(n) to O(log n) - # Binary search from CPython's bisect module, modified for identity testing. + # Binary search from CPython's bisect module, + # modified for identity testing. lo, hi = 0, n while lo < hi: mid = (lo + hi) // 2 From 7534eef2d6da0a55962feb156789a0d2d5ecf208 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 6 Jun 2018 09:00:50 -0500 Subject: [PATCH 6/7] Remove added whitespace from tests. --- toolz/tests/test_itertoolz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index 02154e25..957e2143 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -328,7 +328,6 @@ def __eq__(self, other): assert list(partition_all(2, [obj]*3)) == [(obj, obj), (obj,)] - def test_count(): assert count((1, 2, 3)) == 3 assert count([]) == 0 From 86953ab1dfa252459182e81a9e0c6eaed9d1c686 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 6 Jun 2018 09:13:29 -0500 Subject: [PATCH 7/7] Test both fast and slow paths and make sure they give same result. --- toolz/tests/test_itertoolz.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/toolz/tests/test_itertoolz.py b/toolz/tests/test_itertoolz.py index 957e2143..e0a8d17f 100644 --- a/toolz/tests/test_itertoolz.py +++ b/toolz/tests/test_itertoolz.py @@ -325,7 +325,9 @@ def __eq__(self, other): return True raise ValueError() obj = NoCompare() - assert list(partition_all(2, [obj]*3)) == [(obj, obj), (obj,)] + result = [(obj, obj, obj, obj), (obj, obj, obj)] + assert list(partition_all(4, [obj]*7)) == result + assert list(partition_all(4, iter([obj]*7))) == result def test_count():