Skip to content

Commit

Permalink
Merge pull request #227 from mrocklin/topk
Browse files Browse the repository at this point in the history
alias heapq.nlargest to topk
  • Loading branch information
eriknw committed Feb 6, 2015
2 parents 785edcf + 431df04 commit 7a77607
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Itertoolz
take
tail
take_nth
topk
unique

.. currentmodule:: toolz.recipes
Expand Down
32 changes: 27 additions & 5 deletions toolz/itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
'join', 'tail', 'diff')
'join', 'tail', 'diff', 'topk')


def remove(predicate, seq):
Expand Down Expand Up @@ -57,11 +57,11 @@ def groupby(key, seq):
""" Group a collection by a key function
>>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
>>> groupby(len, names)
>>> groupby(len, names) # doctest: +SKIP
{3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
>>> iseven = lambda x: x % 2 == 0
>>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])
>>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP
{False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
Non-callable keys imply grouping on a member.
Expand Down Expand Up @@ -518,10 +518,10 @@ def reduceby(key, binop, seq, init=no_default):
>>> data = [1, 2, 3, 4, 5]
>>> reduceby(iseven, add, data)
>>> reduceby(iseven, add, data) # doctest: +SKIP
{False: 9, True: 6}
>>> reduceby(iseven, mul, data)
>>> reduceby(iseven, mul, data) # doctest: +SKIP
{False: 15, True: 8}
Complex Example
Expand Down Expand Up @@ -855,3 +855,25 @@ def diff(*seqs, **kwargs):
vals = tuple(map(key, items))
if vals.count(vals[0]) != N:
yield items


def topk(k, seq, key=None):
"""
Find the k largest elements of a sequence
Operates lazily in ``n*log(k)`` time
>>> topk(2, [1, 100, 10, 1000])
(1000, 100)
Use a key function to change sorted order
>>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)
('Charlie', 'Alice')
See also:
heapq.nlargest
"""
if key and not callable(key):
key = getter(key)
return tuple(heapq.nlargest(k, seq, key=key))
20 changes: 19 additions & 1 deletion toolz/tests/test_itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
reduceby, iterate, accumulate,
sliding_window, count, partition,
partition_all, take_nth, pluck, join,
diff)
diff, topk)
from toolz.compatibility import range, filter
from operator import add, mul

Expand Down Expand Up @@ -435,3 +435,21 @@ def indollars(item):

list(diff(data1, data2, key=indollars)) == [
({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})]


def test_topk():
assert topk(2, [4, 1, 5, 2]) == (5, 4)
assert topk(2, [4, 1, 5, 2], key=lambda x: -x) == (1, 2)
assert topk(2, iter([5, 1, 4, 2]), key=lambda x: -x) == (1, 2)

assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9},
{'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='a') == \
({'a': 10, 'b': 1}, {'a': 9, 'b': 2})

assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9},
{'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='b') == \
({'a': 1, 'b': 10}, {'a': 2, 'b': 9})


def test_topk_is_stable():
assert topk(4, [5, 9, 2, 1, 5, 3], key=lambda x: 1) == (5, 9, 2, 1)

0 comments on commit 7a77607

Please sign in to comment.