# Playing with concurrency

In [29]:
import concurrent.futures
import matplotlib.pyplot as plt
import numpy as np
import time
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
%matplotlib inline

Let's write some functions that do work.

In [102]:
def n_fibonacci(n=10):
    """Calculate the Nth Fibonacci number"""
    i = 0
    x = 1
#     print(x)
    for __ in range(n):
        x, i = x + i, x
#         print(x)
    return x

def fake_write(whatevs=None):
    """Take some time as if it were writing to disk"""
#     long_time = np.random.randint(1,5)
    long_time = np.random.random()
    time.sleep(long_time)
    return whatevs

In [31]:
n_fibonacci()

89

Let's create an array (list) of integers to act as input for our functions

In [92]:
randints = np.random.randint(20000, 400000, 50)
randints

array([251751, 242286,  76217,  49458, 379159, 338953, 296870, 148251,
       228062, 243364, 324457,  79559, 326327,  29819, 321357, 261954,
       300343, 316548, 174892,  77046, 270499,  35642, 195284,  78945,
       252897, 217306, 171520, 308306,  46651, 327820,  53522, 333764,
       306389, 254000, 123751, 273276, 315117, 121687, 130857, 193591,
       179324, 331520, 207157,  89921, 135144, 151541, 320873, 353883,
        69192, 310130])

Let's now take this numbers as inputs for our function, and then "write" the results somewhere (e.g. disk/cloud storage)

In [101]:
%%time
results = [
    n_fibonacci(n) for n in randints
]
for __ in results:
    fake_write()

CPU times: user 37.5 s, sys: 4 ms, total: 37.5 s
Wall time: 1min


This is definitely too long... how can one improve it?

### Quick code analysis

This piece of code has two main parts (does it look familiar):

1. A computing part.
2. An I/O part.

Let's take a look at each one separately.

### Multiprocessing

In [103]:
with ProcessPoolExecutor() as p_executor:
    results = p_executor.map(n_fibonacci, randints)
#         results = executor.map(fake_write, results)
#         print(list(results)[:3])

### Using map

In [97]:
%%time
with ProcessPoolExecutor() as p_executor:
    with ThreadPoolExecutor() as executor:
        results = p_executor.map(n_fibonacci, randints)
        results = executor.map(fake_write, results)
        print(list(results)[:3])

[None, None, None]
CPU times: user 60 ms, sys: 24 ms, total: 84 ms
Wall time: 24.3 s


### Using submit

In [99]:
%%time
with ProcessPoolExecutor() as p_executor:
    with ThreadPoolExecutor() as executor:
        futures = [
            p_executor.submit(n_fibonacci, randn) for randn in randints
        ]
        t_futures = [
            executor.submit(
                fake_write,
                future.result()
            ) for future in concurrent.futures.as_completed(futures)
        ]
        concurrent.futures.wait(t_futures)

CPU times: user 60 ms, sys: 28 ms, total: 88 ms
Wall time: 28.7 s


## Improvements

### Chunksize

In [59]:
%%time
with ProcessPoolExecutor() as p_executor:
    with ThreadPoolExecutor() as executor:
        results = p_executor.map(n_fibonacci, randints, chunksize=32)
        executor.map(fake_write, results)

CPU times: user 44 ms, sys: 24 ms, total: 68 ms
Wall time: 24.1 s
