In [None]:
%matplotlib inline
import random
import time
import matplotlib.pyplot as plt
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool

This notebook was tested on a Mac. Changes may be required to run this code in a Windows machine. 

In these examples I am hardcoding the number of cores to use (`NUM_PROCESSES = 4`). The `multiprocessing` module will try and use all available cores, so you shouldn't need to hardcode the number of processes unless you want to manage your resources. In my experiences, at least in Windows machines, I've noticed that is better to hardcode the number of processes as number of available cores - 1. Otherwise, the operating system starts feeling laggy and unresponsive. 

## Estimate the value of pi using the Monte Carlo method

We generate multiple random values. The proportion of random values within a unit circle (x^2 + y^2 <= 1) with respect to the total amoung of generated random values is our approximation of pi. 

This is an ideal first problem because the workload can be evenly split across a number of processes. 

In [None]:
def estimate_nbr_points_in_quarter_circle(nbr_estimates):
    nbr_trials_in_quarter_unit_circle = 0
    
    for step in range(int(nbr_estimates)):
        x = random.uniform(0, 1)
        y = random.uniform(0, 1)
        is_in_unit_circle = x * x + y * y <= 1.0
        nbr_trials_in_quarter_unit_circle += is_in_unit_circle
    return nbr_trials_in_quarter_unit_circle

This version uses a pool of processes. The time is estimated after creating the pool, because spawning processes (as opposed to spawning threads) has some overhead. 

In [None]:
nbr_samples_in_total = 1e8

times_proc = []

for num_processes in range(1,9):
    print('Number of processes: ' + str(num_processes))
    pool = Pool(processes = num_processes)
    nbr_samples_per_worker = nbr_samples_in_total / num_processes
    print('Making {} samples per worker'.format(nbr_samples_per_worker))
    nbr_trials_per_process = [nbr_samples_per_worker] * num_processes

    t1 = time.time()
    nbr_in_unit_circles = pool.map(estimate_nbr_points_in_quarter_circle, nbr_trials_per_process)
    # We multiply by 4 because we are producing sampels only on one quarter of the unit circle
    pi_estimate = sum(nbr_in_unit_circles) * 4 / nbr_samples_in_total
    print('Estimated pi ' + str(pi_estimate))
    delta = time.time() - t1
    print('Delta: ' + str(delta))
    print('-----------------')
    
    times_proc.append(delta)

This version is based on threads. The problem with threads is that due to Python's GIL contraint (Global Interpreter Lock) only one thread can run at a time. As a consequence of this, adding more threads actually slows down the process (due to the overhead of switching between threads).

In [None]:
nbr_samples_in_total = 1e8

times_thread = []

for num_processes in range(1,9):
    print('Number of processes: ' + str(num_processes))
    pool = ThreadPool(processes = num_processes)
    nbr_samples_per_worker = nbr_samples_in_total / num_processes
    print('Making {} samples per worker'.format(nbr_samples_per_worker))
    nbr_trials_per_process = [nbr_samples_per_worker] * num_processes

    t1 = time.time()
    nbr_in_unit_circles = pool.map(estimate_nbr_points_in_quarter_circle, nbr_trials_per_process)
    # We multiply by 4 because we are producing sampels only on one quarter of the unit circle
    pi_estimate = sum(nbr_in_unit_circles) * 4 / nbr_samples_in_total
    print('Estimated pi ' + str(pi_estimate))
    delta = time.time() - t1
    print('Delta: ' + str(delta))
    print('-----------------')
    
    times_thread.append(delta)

In [None]:
fig, ax = plt.subplots()
ax.plot(range(1, 9), times_proc)
ax.plot(range(1, 9), times_thread)
ax.set_xlabel('number of workers')
ax.set_ylabel('time')
ax.grid(True)
ax.legend(['Processes', 'Threads'])
fig.set_figwidth(16)