Skip to content

Commit

Permalink
Revert "Add function random_sample to itertoolz."
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Apr 17, 2016
1 parent 3b7c54a commit 53d52bc
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 71 deletions.
2 changes: 0 additions & 2 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,3 @@ Nikolaos-Digenis Karagiannis [@digenis](https://github.com/di
Joe Jevnik [@llllllllll](https://github.com/llllllllll)

Rory Kirchner [@roryk](https://github.com/roryk)

[Steven Cutting](http://steven-cutting.github.io) [@steven_cutting](https://github.com/steven-cutting)
1 change: 0 additions & 1 deletion doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ Itertoolz
partition_all
peek
pluck
random_sample
reduceby
remove
second
Expand Down
44 changes: 2 additions & 42 deletions toolz/itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import collections
import operator
from functools import partial
from random import Random
from toolz.compatibility import (map, filterfalse, zip, zip_longest, iteritems,
filter)
from toolz.compatibility import (map, filterfalse, zip, zip_longest, iteritems)
from toolz.utils import no_default


Expand All @@ -14,7 +12,7 @@
'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
'join', 'tail', 'diff', 'topk', 'peek', 'random_sample')
'join', 'tail', 'diff', 'topk', 'peek')


def remove(predicate, seq):
Expand Down Expand Up @@ -903,41 +901,3 @@ def peek(seq):
iterator = iter(seq)
item = next(iterator)
return item, itertools.chain([item], iterator)


def random_sample(prob, seq, random_state=None):
"""
Return elements from a sequence with probability of prob
Returns a lazy iterator of random items from seq.
``random_sample`` considers each item independently and without
replacement. See below how the first time it returned 13 items and the
next time it returned 6 items.
>>> seq = list(range(100))
>>> list(random_sample(0.1, seq)) # doctest: +SKIP
[6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]
>>> list(random_sample(0.1, seq)) # doctest: +SKIP
[6, 44, 54, 61, 69, 94]
Providing an integer seed for ``random_state`` will result in
deterministic sampling. Given the same seed it will return the same sample
every time.
>>> list(random_sample(0.1, seq, random_state=2016))
[7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
>>> list(random_sample(0.1, seq, random_state=2016))
[7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
``random_state`` can also be any object with a method ``random`` that
returns floats between 0.0 and 1.0 (exclusive).
>>> from random import Random
>>> randobj = Random(2016)
>>> list(random_sample(0.1, seq, random_state=randobj))
[7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
"""
if not hasattr(random_state, 'random'):
random_state = Random(random_state)
return filter(lambda _: random_state.random() < prob, seq)
27 changes: 1 addition & 26 deletions toolz/tests/test_itertoolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from itertools import starmap
from toolz.utils import raises
from functools import partial
from random import Random
from toolz.itertoolz import (remove, groupby, merge_sorted,
concat, concatv, interleave, unique,
isiterable, getter,
Expand All @@ -12,7 +11,7 @@
reduceby, iterate, accumulate,
sliding_window, count, partition,
partition_all, take_nth, pluck, join,
diff, topk, peek, random_sample)
diff, topk, peek)
from toolz.compatibility import range, filter
from operator import add, mul

Expand Down Expand Up @@ -477,27 +476,3 @@ def test_peek():
assert list(blist) == alist

assert raises(StopIteration, lambda: peek([]))


def test_random_sample():
alist = list(range(100))

assert list(random_sample(prob=1, seq=alist, random_state=2016)) == alist

mk_rsample = lambda rs=1: list(random_sample(prob=0.1,
seq=alist,
random_state=rs))
rsample1 = mk_rsample()
assert rsample1 == mk_rsample()

rsample2 = mk_rsample(1984)
randobj = Random(1984)
assert rsample2 == mk_rsample(randobj)

assert rsample1 != rsample2

assert mk_rsample(object) == mk_rsample(object)
assert mk_rsample(object) != mk_rsample(object())
assert mk_rsample(b"a") == mk_rsample(u"a")

assert raises(TypeError, lambda: mk_rsample([]))

0 comments on commit 53d52bc

Please sign in to comment.