# Setup

In [1]:
import numpy as np
from time import time
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
def calc_pi(darts_in_circle, total_darts):
    """
    We can calculate an approximate numerical value for pi using the formula for
    the area of a circle (which defines pi). A = pi * r**2. Here, r=0.5 and the
    area of the circle can be approximated by the ratio of the number of darts
    that fall inside the circle over the total number of darts thrown. Thus we
    have pi = 4 * Area.
    """
    return 4 * darts_in_circle / float(total_darts)

In [3]:
def reporter(pi_approx, n_darts, x_time, x_type):
    """
    Print out some summary info about the run. Execution time should increase
    with increasing number of darts. Darts thrown per second should stay
    relativelly constant and is sort of a measure of the speed of the processor
    (although is is highly dependent on the efficiencies of the various operators
    and functions involved).
    """
    print("Pi Approximation:", pi_approx)
    print("Number of Darts:", n_darts)
    print("Execution Time (s):", x_time)
    dps = n_darts / float(x_time)
    print("Darts Thrown per Second:", dps)
    
    report_dict ={'pi_approx': pi_approx,
                  'n_darts': n_darts,
                  'x_time': x_time,
                  'dps': dps,
                  'x_type': x_type}
    
    return report_dict

In [36]:
def dart_thrower(foo):
    import numpy as np
    x, y = np.random.uniform(0, 1), np.random.uniform(0, 1)
    if np.sqrt((x - 0.5)**2 + (y - 0.5)**2) <= 0.5:
        number_of_darts_in_circle = 1
    else:
        number_of_darts_in_circle = 0
    return number_of_darts_in_circle

# Initial Comparison

In [6]:
# Define the total number of darts we'll throw
number_of_darts = 1000000

## Serial Execution

In [7]:
def run_serial(n_darts):
    n_darts = int(n_darts)
    # Define a variable to store the number of darts that fall inside the circle.
    number_of_darts_in_circle = 0

    # We will use time() to record the execution time of the loop that runs the
    # dart throwing simulation.
    start_time = time()

    # This loop simulates the dart throwing. For each dart, find a random position
    # in the unit square for it to fall. Test if it falls within the circle by
    # calculating the distance from the origin (0.5, 0.5) to the dart. Darts that
    # fall within 0.5 of the origin are within the circle.
    for n in range(n_darts):
        x, y = np.random.uniform(0, 1), np.random.uniform(0, 1)
        if np.sqrt((x - 0.5)**2 + (y - 0.5)**2) <= 0.5:
            number_of_darts_in_circle += 1

    # Record the time after the conclusion of the loop.
    end_time = time()
    # The total time required to run the loop is the difference.
    execution_time = end_time - start_time

    # Estimate pi.
    pi_approx = calc_pi(number_of_darts_in_circle, n_darts)

    # Print out some summary info about the run.
    report = reporter(pi_approx, n_darts, execution_time, 'serial')
    
    return report

In [8]:
serial_report = run_serial(number_of_darts)

Pi Approximation: 3.140752
Number of Darts: 1000000
Execution Time (s): 2.201411008834839
Darts Thrown per Second: 454254.11065300304


## Parallel Excution (using `multiprocessing`)

In [9]:
def run_multiproc(n_darts):
    n_darts = int(n_darts)
    # Spin up a Pool
    from multiprocessing import Pool 
    pool = Pool(processes=4)

    # We will use time() to record the execution time of the dart throwing simulation.
    start_time = time()

    # Run the simulation.
    res = pool.map(dart_thrower, range(n_darts))
    number_of_darts_in_circle = sum(res)

    # Record the time after the simulation has run.
    end_time = time()
    # The total time required to run the simulation is the difference.
    execution_time = end_time - start_time

    # Estimate pi.
    pi_approx = calc_pi(number_of_darts_in_circle, n_darts)

    # Print out some summary info about the run.
    report = reporter(pi_approx, n_darts, execution_time, 'multiproc')

    # Close the pool.
    pool.close()
    pool.terminate()
    del pool
    
    return report

In [10]:
multiproc_report = run_multiproc(number_of_darts)

Pi Approximation: 3.145152
Number of Darts: 1000000
Execution Time (s): 1.7113430500030518
Darts Thrown per Second: 584336.3783773317


## Parallel Execution (using `dask.bag`)

In [18]:
def run_dask(n_darts):
    n_darts = int(n_darts)
    
    import dask.bag as db
    
    # Initialize the dask bag.
    throws = db.from_sequence(range(n_darts))

    # We will use time() to record the execution time of the dart throwing simulation.
    start_time = time()

    # Run the simulation.
    res = throws.map(dart_thrower).compute()
    number_of_darts_in_circle = sum(res)

    # Record the time after the simulation has run.
    end_time = time()
    # The total time required to run the simulation is the difference.
    execution_time = end_time - start_time

    # Estimate pi.
    pi_approx = calc_pi(number_of_darts_in_circle, n_darts)

    # Print out some summary info about the run.
    report = reporter(pi_approx, n_darts, execution_time, 'dask')
    
    return report

In [19]:
dask_report = run_dask(number_of_darts)

Pi Approximation: 3.145472
Number of Darts: 1000000
Execution Time (s): 6.859676837921143
Darts Thrown per Second: 145779.46215656635


## Slower Options

In [26]:
volleys = np.linspace(10, 10000000)

### `dask.distributed`

In [13]:
from dask.distributed import Executor
e = Executor(set_as_default=True)

In [17]:
%%time
throws = e.map(dart_thrower, range(100000))
foo = e.gather(throws)

CPU times: user 34.9 s, sys: 1.43 s, total: 36.3 s
Wall time: 47.8 s


### `ipyparallel`

In [32]:
from ipyparallel import Client

In [33]:
c = Client()

In [34]:
view = c.load_balanced_view()

In [40]:
%%time
foo = list(view.map(dart_thrower, range(10000)))

CPU times: user 29.5 s, sys: 3.81 s, total: 33.3 s
Wall time: 53.3 s
