Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 44 additions & 14 deletions toolz/itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,8 @@ def join(leftkey, leftseq, rightkey, rightseq,
This is a semi-streaming operation. The LEFT sequence is fully evaluated
and placed into memory. The RIGHT sequence is evaluated lazily and so can
be arbitrarily large.
(Note: If right_default is defined, then unique keys of rightseq
will also be stored in memory.)

>>> friends = [('Alice', 'Edith'),
... ('Alice', 'Zhao'),
Expand Down Expand Up @@ -858,7 +860,10 @@ def join(leftkey, leftseq, rightkey, rightseq,

Usually the key arguments are callables to be applied to the sequences. If
the keys are not obviously callable then it is assumed that indexing was
intended, e.g. the following is a legal change
intended, e.g. the following is a legal change.
The join is implemented as a hash join and the keys of leftseq must be
hashable. Additionally, if right_default is defined, then keys of rightseq
must also be hashable.

>>> # result = join(second, friends, first, cities)
>>> result = join(1, friends, 0, cities) # doctest: +SKIP
Expand All @@ -869,22 +874,47 @@ def join(leftkey, leftseq, rightkey, rightseq,
rightkey = getter(rightkey)

d = groupby(leftkey, leftseq)
seen_keys = set()

left_default_is_no_default = (left_default == no_default)
for item in rightseq:
key = rightkey(item)
seen_keys.add(key)
try:
left_matches = d[key]
for match in left_matches:
yield (match, item)
except KeyError:
if not left_default_is_no_default:
if left_default == no_default and right_default == no_default:
# Inner Join
for item in rightseq:
key = rightkey(item)
if key in d:
for left_match in d[key]:
yield (left_match, item)
elif left_default != no_default and right_default == no_default:
# Right Join
for item in rightseq:
key = rightkey(item)
if key in d:
for left_match in d[key]:
yield (left_match, item)
else:
yield (left_default, item)
elif right_default != no_default:
seen_keys = set()
seen = seen_keys.add

if left_default == no_default:
# Left Join
for item in rightseq:
key = rightkey(item)
seen(key)
if key in d:
for left_match in d[key]:
yield (left_match, item)
else:
# Full Join
for item in rightseq:
key = rightkey(item)
seen(key)
if key in d:
for left_match in d[key]:
yield (left_match, item)
else:
yield (left_default, item)

if right_default != no_default:
for key, matches in d.items():
for key, matches in iteritems(d):
if key not in seen_keys:
for match in matches:
yield (match, right_default)
Expand Down