# 18.0 Randomized Algorithms

## 18.1 Pick a random element from an infinite stream

### Problem Statement
Given a stream of elements too large to store in memory pick a random element from the stream with uniform probability.

In [1]:
from collections import defaultdict, namedtuple
import random
import unittest


def reservoir_sample(iterable, k=1):
    """Use reservoir sampling to pick k random elements from stream."""

    # Fill the reservoir with first k elements from iterable.
    randx, n = [], 1
    while n <= k:
        randx.append(next(iterable))
        n += 1
    
    # Replace an element in the reservoir with 1/n probability.
    for x in iterable:
        rind = random.randint(1, n)
        if rind <= k:
            randx[rind-1] = x
        n += 1

    return randx


class ReservoirSampleTest(unittest.TestCase):
    
    def setUp(self):
        # Seed random number generator for predictable results. 
        random.seed(13)

    def test_reservoir_sample(self):
        case = namedtuple('case', ['nsamples','xmin','xmax','k',
                                   'expected'])
        cases = [
            # Reservoir size k=1.
            case(10000, 1, 10, 1,
                {1:0.1009, 2:0.1023, 3:0.1008, 4:0.0955, 5:0.1059,
                 6:0.0978, 7:0.0986, 8:0.1018, 9:0.096, 10:0.1004}),
            # Reservoir size k=2.
            case(10000, 1, 10, 2,
                {1:0.1968, 2:0.2016, 3:0.2055, 4:0.2088, 5:0.1989,
                 6:0.1957, 7:0.2033, 8:0.1992, 9:0.1865, 10:0.2037}),
        ]
        for c in cases:
            cnt = defaultdict(int)
            for _ in range(c.nsamples):
                xiter = iter(range(c.xmin, c.xmax+1))
                randx = reservoir_sample(xiter, k=c.k)
                for x in randx:
                    cnt[x] += 1
            rcv = {k:v/c.nsamples for k, v in cnt.items()}
            self.assertEqual(rcv, c.expected)


unittest.main(ReservoirSampleTest(), argv=[''], verbosity=2, exit=False)

test_reservoir_sample (__main__.ReservoirSampleTest) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.342s

OK


<unittest.main.TestProgram at 0x7fbbfc249710>