In [26]:
import numpy as np
import scipy
import scipy.stats

import concurrent
import multiprocessing

from functools import partial

In [65]:
def execute(exectutor, random_function, n_samples):
    random_numbers = []
    with exectutor(max_workers=multiprocessing.cpu_count()) as executor:
            future_to_random = {executor.submit(random_function): b for b in range(n_samples)}
            for future in concurrent.futures.as_completed(future_to_random):
                b = future_to_random[future]
                try:
                    random_numbers.append(future.result())
                except Exception as exc:
                    print(f"generated an exception: {exc}")
    return random_numbers

## Randomness

In [None]:
# np.random.random() samples form a continuous uniform distribution between [0, 1)
# https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.random.random.html#numpy.random.random

def generate_random():
    return np.random.random()

def generate_random_seed():
    scipy.random.seed()
    return np.random.random()

In [5]:
# ks test tests goodness of fit to uniform distribution.
# https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.kstest.html
# Null hypothesis is that empirical distribution of x and 'uniform' equal
def test_uniform(x):
    return scipy.stats.kstest(x, 'uniform')

### ProcessPoolExecutor without seed


In [7]:
execute(concurrent.futures.ProcessPoolExecutor, generate_random, 10)

[0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094,
 0.8147129803842094]

That doesn't look random to me. Let's use the KS test to check.

In [20]:
x = execute(concurrent.futures.ProcessPoolExecutor, generate_random, 1000)
test_uniform(x).pvalue

5.688366223765867e-10

Seems quite significant. We can reject the null. The two distributions are not the same with very high probability.

### Thread pool executor without seed

In [30]:
execute(concurrent.futures.ThreadPoolExecutor, generate_random, 10)

[0.4022700186390239,
 0.03626181650585103,
 0.8421808068397407,
 0.8373856895356356,
 0.5731373725691851,
 0.631598702196628,
 0.6504276922773542,
 0.5015282007961973,
 0.21039857328688172,
 0.581512328242947]

Looks better, let's make sure that it is.

In [63]:
x = execute(concurrent.futures.ThreadPoolExecutor, generate_random, 1000)
test_uniform(x).pvalue

0.7443606847918853

Looks good. We can't reject the null hypothesis. 

There is also a second possible solution:

### ProcessPoolExecutor with seed

In [31]:
execute(concurrent.futures.ProcessPoolExecutor, generate_random_seed, 10)

[0.15024702541952184,
 0.663013373426398,
 0.9071019315049326,
 0.5581711681531546,
 0.760395688185846,
 0.12881868459866508,
 0.6504844220709478,
 0.025152605613995127,
 0.5770563186769598,
 0.09618697683998012]

In [64]:
x = execute(concurrent.futures.ProcessPoolExecutor, generate_random_seed, 1000)
test_uniform(x).pvalue

0.6387485683349132

## Speedup

One thing to note is that multithreading in python isn't real though. Depending on what you are running, there might be little to no speed up. Also, be careful that your process isn't so fast that the overhead of multiprocessing is no longer worth it.

In [48]:
%%timeit
[generate_random() for _ in range(100000)]

47.4 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [49]:
%%timeit
x = execute(concurrent.futures.ProcessPoolExecutor, generate_random, 100000)

21.1 s ± 205 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [51]:
%%timeit
x = execute(concurrent.futures.ThreadPoolExecutor, generate_random, 100000)

1.49 s ± 35.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
def generate_many_random(n):
    for i in range(n):
        np.random.random()

In [53]:
%%timeit
[generate_many_random(1000000) for _ in range(24)]

9.57 s ± 145 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [54]:
%%timeit
x = execute(concurrent.futures.ProcessPoolExecutor, partial(generate_many_random, 1000000), 24)

2.57 s ± 300 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [55]:
%%timeit
x = execute(concurrent.futures.ThreadPoolExecutor, partial(generate_many_random, 1000000), 24)

9.54 s ± 75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
