Skip to content

Commit

Permalink
Merge pull request #241 from davidshepherd7/unzip-2
Browse files Browse the repository at this point in the history
Add an implementation of unzip
  • Loading branch information
eriknw committed Jun 26, 2015
2 parents 7994485 + ba4204c commit 8cdc7fe
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Sandbox
.. autosummary::
parallel.fold
core.EqualityHashKey
core.unzip


Definitions
Expand Down
2 changes: 1 addition & 1 deletion toolz/sandbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .core import EqualityHashKey
from .core import EqualityHashKey, unzip
from .parallel import fold
41 changes: 40 additions & 1 deletion toolz/sandbox/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from toolz.itertoolz import getter
from toolz.itertoolz import getter, cons, pluck
from itertools import tee, starmap


# See #166: https://github.com/pytoolz/toolz/issues/166
Expand Down Expand Up @@ -91,3 +92,41 @@ def __str__(self):

def __repr__(self):
return '=%s=' % repr(self.item)


# See issue #293: https://github.com/pytoolz/toolz/issues/239
def unzip(seq):
"""Inverse of ``zip``
>>> a, b = unzip([('a', 1), ('b', 2)])
>>> list(a)
['a', 'b']
>>> list(b)
[1, 2]
Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this
implementation can handle a finite sequence of infinite sequences.
Caveats:
* The implementation uses ``tee``, and so can use a significant amount
of auxiliary storage if the resulting iterators are consumed at
different times.
* The top level sequence cannot be infinite.
"""

seq = iter(seq)

# Check how many iterators we need
try:
first = tuple(next(seq))
except StopIteration:
return tuple()

# and create them
niters = len(first)
seqs = tee(cons(first, seq), niters)

return tuple(starmap(pluck, enumerate(seqs)))
33 changes: 31 additions & 2 deletions toolz/sandbox/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from toolz import curry, unique, first
from toolz.sandbox.core import EqualityHashKey
from toolz import curry, unique, first, take
from toolz.sandbox.core import EqualityHashKey, unzip
from itertools import count, repeat
from toolz.compatibility import map, zip


def test_EqualityHashKey_default_key():
Expand Down Expand Up @@ -72,3 +74,30 @@ def test_EqualityHashKey_index_key():
EqualityHash0 = curry(EqualityHashKey, 0)
assert list(unique(3*[list1, list2, list3a, list3b],
key=EqualityHash0)) == [list1, list2, list3a]


def test_unzip():
def _to_lists(seq, n=10):
"""iter of iters -> finite list of finite lists
"""
def initial(s):
return list(take(n, s))

return initial(map(initial, seq))

def _assert_initial_matches(a, b, n=10):
assert list(take(n, a)) == list(take(n, b))

# Unzips a simple list correctly
assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \
== [['a', 'b', 'c'], [1, 2, 3]]

# Can handle a finite number of infinite iterators (the naive unzip
# implementation `zip(*args)` impelementation fails on this example).
a, b, c = unzip(zip(count(1), repeat(0), repeat(1)))
_assert_initial_matches(a, count(1))
_assert_initial_matches(b, repeat(0))
_assert_initial_matches(c, repeat(1))

# Sensibly handles empty input
assert list(unzip(zip([]))) == []

0 comments on commit 8cdc7fe

Please sign in to comment.