Skip to content

Commit

Permalink
Merge pull request #201 from eriknw/diff
Browse files Browse the repository at this point in the history
Add `itertoolz.diff` to yield elements that differ between sequences.
  • Loading branch information
eriknw committed Jan 5, 2015
2 parents c30261e + bc538a0 commit 0ca38c8
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Itertoolz
concatv
cons
count
diff
drop
first
frequencies
Expand Down
22 changes: 22 additions & 0 deletions doc/source/tips-and-tricks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,25 @@ a few of these recipes.
'location': 'Oakland',
'name': 'Matthew',
'person_id': 2}]

* .. function:: areidentical(\*seqs)

Determine if sequences are identical element-wise.
This lazily evaluates the sequences and stops as soon as the result
is determined.

::

from toolz import diff

def areidentical(*seqs):
return not any(diff(*seqs, default=object()))


Example:

>>> areidentical([1, 2, 3], (1, 2, 3))
True

>>> areidentical([1, 2, 3], [1, 2])
False
42 changes: 41 additions & 1 deletion toolz/itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
'join', 'tail')
'join', 'tail', 'diff')


def remove(predicate, seq):
Expand Down Expand Up @@ -815,3 +815,43 @@ def join(leftkey, leftseq, rightkey, rightseq,
if key not in seen_keys:
for match in matches:
yield (match, right_default)


def diff(*seqs, **kwargs):
""" Return those items that differ between sequences
>>> list(diff([1, 2, 3], [1, 2, 10, 100]))
[(3, 10)]
Shorter sequences may be padded with a ``default`` value:
>>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
[(3, 10), (None, 100)]
A ``key`` function may also be applied to each item to use during
comparisons:
>>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
[('bananas', 'Oranges')]
"""
N = len(seqs)
if N == 1 and isinstance(seqs[0], list):
seqs = seqs[0]
N = len(seqs)
if N < 2:
raise TypeError('Too few sequences given (min 2 required)')
default = kwargs.get('default', no_default)
if default is no_default:
iters = zip(*seqs)
else:
iters = zip_longest(*seqs, fillvalue=default)
key = kwargs.get('key', None)
if key is None:
for items in iters:
if items.count(items[0]) != N:
yield items
else:
for items in iters:
vals = tuple(map(key, items))
if vals.count(vals[0]) != N:
yield items
34 changes: 33 additions & 1 deletion toolz/tests/test_itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
rest, last, cons, frequencies,
reduceby, iterate, accumulate,
sliding_window, count, partition,
partition_all, take_nth, pluck, join)
partition_all, take_nth, pluck, join,
diff)
from toolz.compatibility import range, filter
from operator import add, mul

Expand Down Expand Up @@ -403,3 +404,34 @@ def test_outer_join():
expected = set([(2, 2), (1, None), (None, 3)])

assert result == expected


def test_diff():
assert raises(TypeError, lambda: list(diff()))
assert raises(TypeError, lambda: list(diff([1, 2])))
assert raises(TypeError, lambda: list(diff([1, 2], 3)))
assert list(diff([1, 2], (1, 2), iter([1, 2]))) == []
assert list(diff([1, 2, 3], (1, 10, 3), iter([1, 2, 10]))) == [
(2, 10, 2), (3, 3, 10)]
assert list(diff([1, 2], [10])) == [(1, 10)]
assert list(diff([1, 2], [10], default=None)) == [(1, 10), (2, None)]
# non-variadic usage
assert raises(TypeError, lambda: list(diff([])))
assert raises(TypeError, lambda: list(diff([[]])))
assert raises(TypeError, lambda: list(diff([[1, 2]])))
assert raises(TypeError, lambda: list(diff([[1, 2], 3])))
assert list(diff([(1, 2), (1, 3)])) == [(2, 3)]

data1 = [{'cost': 1, 'currency': 'dollar'},
{'cost': 2, 'currency': 'dollar'}]

data2 = [{'cost': 100, 'currency': 'yen'},
{'cost': 300, 'currency': 'yen'}]

conversions = {'dollar': 1, 'yen': 0.01}

def indollars(item):
return conversions[item['currency']] * item['cost']

list(diff(data1, data2, key=indollars)) == [
({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})]

0 comments on commit 0ca38c8

Please sign in to comment.