Skip to content

Commit

Permalink
GH-98363: Add itertools.batched() (GH-98364)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhettinger committed Oct 17, 2022
1 parent 70732d8 commit de3ece7
Show file tree
Hide file tree
Showing 5 changed files with 370 additions and 39 deletions.
76 changes: 39 additions & 37 deletions Doc/library/itertools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Iterator Arguments Results
Iterator Arguments Results Example
============================ ============================ ================================================= =============================================================
:func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG', n=3) --> ABC DEF G``
:func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F``
:func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F``
:func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``
Expand Down Expand Up @@ -170,6 +171,44 @@ loops that truncate the stream.
.. versionchanged:: 3.8
Added the optional *initial* parameter.


.. function:: batched(iterable, n)

Batch data from the *iterable* into lists of length *n*. The last
batch may be shorter than *n*.

Loops over the input iterable and accumulates data into lists up to
size *n*. The input is consumed lazily, just enough to fill a list.
The result is yielded as soon as the batch is full or when the input
iterable is exhausted:

.. doctest::

>>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet']
>>> unflattened = list(batched(flattened_data, 2))
>>> unflattened
[['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']]

>>> for batch in batched('ABCDEFG', 3):
... print(batch)
...
['A', 'B', 'C']
['D', 'E', 'F']
['G']

Roughly equivalent to::

def batched(iterable, n):
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError('n must be at least one')
it = iter(iterable)
while (batch := list(islice(it, n))):
yield batch

.. versionadded:: 3.12


.. function:: chain(*iterables)

Make an iterator that returns elements from the first iterable until it is
Expand Down Expand Up @@ -858,13 +897,6 @@ which incur interpreter overhead.
else:
raise ValueError('Expected fill, strict, or ignore')
def batched(iterable, n):
"Batch data into lists of length n. The last batch may be shorter."
# batched('ABCDEFG', 3) --> ABC DEF G
it = iter(iterable)
while (batch := list(islice(it, n))):
yield batch

def triplewise(iterable):
"Return overlapping triplets from an iterable"
# triplewise('ABCDEFG') --> ABC BCD CDE DEF EFG
Expand Down Expand Up @@ -1211,36 +1243,6 @@ which incur interpreter overhead.
>>> list(grouper('abcdefg', n=3, incomplete='ignore'))
[('a', 'b', 'c'), ('d', 'e', 'f')]

>>> list(batched('ABCDEFG', 3))
[['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]
>>> list(batched('ABCDEF', 3))
[['A', 'B', 'C'], ['D', 'E', 'F']]
>>> list(batched('ABCDE', 3))
[['A', 'B', 'C'], ['D', 'E']]
>>> list(batched('ABCD', 3))
[['A', 'B', 'C'], ['D']]
>>> list(batched('ABC', 3))
[['A', 'B', 'C']]
>>> list(batched('AB', 3))
[['A', 'B']]
>>> list(batched('A', 3))
[['A']]
>>> list(batched('', 3))
[]
>>> list(batched('ABCDEFG', 2))
[['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']]
>>> list(batched('ABCDEFG', 1))
[['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']]
>>> list(batched('ABCDEFG', 0))
[]
>>> list(batched('ABCDEFG', -1))
Traceback (most recent call last):
...
ValueError: Stop argument for islice() must be None or an integer: 0 <= x <= sys.maxsize.
>>> s = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
>>> all(list(flatten(batched(s[:n], 5))) == list(s[:n]) for n in range(len(s)))
True

>>> list(triplewise('ABCDEFG'))
[('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E'), ('D', 'E', 'F'), ('E', 'F', 'G')]

Expand Down
79 changes: 79 additions & 0 deletions Lib/test/test_itertools.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,44 @@ def test_accumulate(self):
with self.assertRaises(TypeError):
list(accumulate([10, 20], 100))

def test_batched(self):
self.assertEqual(list(batched('ABCDEFG', 3)),
[['A', 'B', 'C'], ['D', 'E', 'F'], ['G']])
self.assertEqual(list(batched('ABCDEFG', 2)),
[['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']])
self.assertEqual(list(batched('ABCDEFG', 1)),
[['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']])

with self.assertRaises(TypeError): # Too few arguments
list(batched('ABCDEFG'))
with self.assertRaises(TypeError):
list(batched('ABCDEFG', 3, None)) # Too many arguments
with self.assertRaises(TypeError):
list(batched(None, 3)) # Non-iterable input
with self.assertRaises(TypeError):
list(batched('ABCDEFG', 'hello')) # n is a string
with self.assertRaises(ValueError):
list(batched('ABCDEFG', 0)) # n is zero
with self.assertRaises(ValueError):
list(batched('ABCDEFG', -1)) # n is negative

data = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
for n in range(1, 6):
for i in range(len(data)):
s = data[:i]
batches = list(batched(s, n))
with self.subTest(s=s, n=n, batches=batches):
# Order is preserved and no data is lost
self.assertEqual(''.join(chain(*batches)), s)
# Each batch is an exact list
self.assertTrue(all(type(batch) is list for batch in batches))
# All but the last batch is of size n
if batches:
last_batch = batches.pop()
self.assertTrue(all(len(batch) == n for batch in batches))
self.assertTrue(len(last_batch) <= n)
batches.append(last_batch)

def test_chain(self):

def chain2(*iterables):
Expand Down Expand Up @@ -1737,6 +1775,31 @@ def test_takewhile(self):

class TestPurePythonRoughEquivalents(unittest.TestCase):

def test_batched_recipe(self):
def batched_recipe(iterable, n):
"Batch data into lists of length n. The last batch may be shorter."
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError('n must be at least one')
it = iter(iterable)
while (batch := list(islice(it, n))):
yield batch

for iterable, n in product(
['', 'a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef', 'abcdefg', None],
[-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, None]):
with self.subTest(iterable=iterable, n=n):
try:
e1, r1 = None, list(batched(iterable, n))
except Exception as e:
e1, r1 = type(e), None
try:
e2, r2 = None, list(batched_recipe(iterable, n))
except Exception as e:
e2, r2 = type(e), None
self.assertEqual(r1, r2)
self.assertEqual(e1, e2)

@staticmethod
def islice(iterable, *args):
s = slice(*args)
Expand Down Expand Up @@ -1788,6 +1851,10 @@ def test_accumulate(self):
a = []
self.makecycle(accumulate([1,2,a,3]), a)

def test_batched(self):
a = []
self.makecycle(batched([1,2,a,3], 2), a)

def test_chain(self):
a = []
self.makecycle(chain(a), a)
Expand Down Expand Up @@ -1972,6 +2039,18 @@ def test_accumulate(self):
self.assertRaises(TypeError, accumulate, N(s))
self.assertRaises(ZeroDivisionError, list, accumulate(E(s)))

def test_batched(self):
s = 'abcde'
r = [['a', 'b'], ['c', 'd'], ['e']]
n = 2
for g in (G, I, Ig, L, R):
with self.subTest(g=g):
self.assertEqual(list(batched(g(s), n)), r)
self.assertEqual(list(batched(S(s), 2)), [])
self.assertRaises(TypeError, batched, X(s), 2)
self.assertRaises(TypeError, batched, N(s), 2)
self.assertRaises(ZeroDivisionError, list, batched(E(s), 2))

def test_chain(self):
for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)):
for g in (G, I, Ig, S, L, R):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added itertools.batched() to batch data into lists of a given length with
the last list possibly being shorter than the others.
81 changes: 80 additions & 1 deletion Modules/clinic/itertoolsmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit de3ece7

Please sign in to comment.