From 1ca39438aafbda16960be0d3a4ddf735b4a19fed Mon Sep 17 00:00:00 2001 From: Raphael Krupinski <10319569-mattesilver@users.noreply.gitlab.com> Date: Sun, 9 Jul 2023 16:28:51 +0200 Subject: [PATCH 1/2] Add chunked_filter (#344) --- boltons/iterutils.py | 24 ++++++++++++++++++++++++ docs/iterutils.rst | 1 + 2 files changed, 25 insertions(+) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index eddbeff2..f1cd98d2 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -374,6 +374,30 @@ def chunked_iter(src, size, **kw): return +def chunked_filter(iterable, predicate, chunk_size): + """A version of :func:`filter` which will call predicate with a chunk of the iterable. + + >>> list(chunked_filter(range(10), lambda chunk: (x % 2==0 for x in chunk), 5)) + [0, 2, 4, 6, 8] + + In the above example the lambda function is called twice: once with values + 0-4 and then for 5-9. + + Args: + iterable (Iterable): Items to filter + predicate (Callable): Predicate function + chunk_size (int): The maximum size of chunks that will be passed the + predicate function. + """ + + return ( + item + for chunk in chunked_iter(iterable, chunk_size) + for item, allow in zip(chunk, predicate(chunk)) + if allow + ) + + def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False): """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*. Optionally, a start of the input can be set via *input_offset*, and diff --git a/docs/iterutils.rst b/docs/iterutils.rst index 23165a0b..0218a6b7 100644 --- a/docs/iterutils.rst +++ b/docs/iterutils.rst @@ -18,6 +18,7 @@ present in the standard library. .. autofunction:: chunked .. autofunction:: chunked_iter +.. autofunction:: chunked_filter .. autofunction:: chunk_ranges .. autofunction:: pairwise .. autofunction:: pairwise_iter From 08dccec38c14a42df697d0b0eba96dc925c58422 Mon Sep 17 00:00:00 2001 From: Raphael Krupinski <10319569-mattesilver@users.noreply.gitlab.com> Date: Tue, 25 Jul 2023 07:55:36 +0200 Subject: [PATCH 2/2] [chunked_filter] Validate inputs, improve docstrings, add tests. --- boltons/iterutils.py | 38 +++++++++++++++++++++++++++++++------- tests/test_iterutils.py | 20 ++++++++++++++++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index f1cd98d2..70ce322d 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -374,10 +374,10 @@ def chunked_iter(src, size, **kw): return -def chunked_filter(iterable, predicate, chunk_size): - """A version of :func:`filter` which will call predicate with a chunk of the iterable. +def chunked_filter(iterable, predicate, size): + """A version of :func:`filter` which will call *key* with a chunk of the *src*. - >>> list(chunked_filter(range(10), lambda chunk: (x % 2==0 for x in chunk), 5)) + >>> list(chunked_filter(range(10), lambda chunk: (x % 2==0 for x in chunk),5)) [0, 2, 4, 6, 8] In the above example the lambda function is called twice: once with values @@ -385,15 +385,39 @@ def chunked_filter(iterable, predicate, chunk_size): Args: iterable (Iterable): Items to filter - predicate (Callable): Predicate function - chunk_size (int): The maximum size of chunks that will be passed the + predicate (Callable): Bulk predicate function that accepts a list of items + and returns an interable of bools + size (int): The maximum size of chunks that will be passed the predicate function. + + The intended use case for this function is with external APIs, + for all kinds of validations. Since APIs always have limitations, + either explicitely for number of passed items, or at least for the request size, + it's required to pass large collections in chunks. """ + if not is_iterable(iterable): + raise TypeError('expected an iterable') + size = _validate_positive_int(size, 'chunk size') + + if not callable(predicate): + raise TypeError('expected callable key') + + def predicate_(src_): + allow_iter = predicate(src_) + if not is_iterable(allow_iter): + raise TypeError('expected an iterable from key(src)') + + allow_list = list(allow_iter) + if len(allow_list) != len(src_): + raise ValueError('expected the iterable from key(src) has the same length as the passed chunk of items') + + return allow_list + return ( item - for chunk in chunked_iter(iterable, chunk_size) - for item, allow in zip(chunk, predicate(chunk)) + for chunk in chunked_iter(iterable, size) + for item, allow in zip(chunk, predicate_(chunk)) if allow ) diff --git a/tests/test_iterutils.py b/tests/test_iterutils.py index 2738d4b7..bac7dee8 100644 --- a/tests/test_iterutils.py +++ b/tests/test_iterutils.py @@ -511,6 +511,26 @@ def test_chunked_bytes(): assert chunked(b'123', 2) in (['12', '3'], [b'12', b'3']) +class TestChunkedFilter(object): + def test_not_iterable(self): + from boltons.iterutils import chunked_filter + + with pytest.raises(TypeError): + chunked_filter(7, lambda chunk: (True for x in chunk), 10) + + def test_size_zero(self): + from boltons.iterutils import chunked_filter + + with pytest.raises(ValueError): + chunked_filter((1, 2, 3), lambda chunk: (True for x in chunk), 0) + + def test_not_callable(self): + from boltons.iterutils import chunked_filter + + with pytest.raises(TypeError): + chunked_filter((1, 2, 3), 'allow odd numbers', 10) + + def test_chunk_ranges(): from boltons.iterutils import chunk_ranges