# Parallelism

There are basically four interesting scenarios we can distinguish:

  - Few long-running IO bound jobs
  - Few long-running CPU bound jobs
  - Many short-running IO bound jobs
  - Many short-running CPU bound jobs

In [22]:
import concurrent.futures
import time
import numpy as np

def long_io_bound():
    """ A long io bound job which runs for 1 s """
    time.sleep(1)
        
def long_cpu_bound():
    """ A long cpu bound job which runs for a few seconds """
    matrix = np.random.uniform(size=(90, 90))
    for i in range(0, 10000):
        matrix = matrix @ matrix
    
def short_io_bound():
    """ A short io bound job which runs for 1 ms """
    time.sleep(0.001)
    
def short_cpu_bound():
    """ A short cpu bound job which runs for a few ms """
    matrix = np.random.uniform(size=(90, 90))
    for i in range(0, 10):
        matrix = matrix @ matrix
        
few_jobs = 10
many_jobs = 10000

In [6]:
%%timeit 
long_io_bound()

1 s ± 95.3 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
%%timeit 
long_cpu_bound()

1.22 s ± 39.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
%%timeit 
short_io_bound()

1.14 ms ± 6.41 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [26]:
%%timeit 
short_cpu_bound()

1.41 ms ± 36.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Multiprocessing

In [27]:
%%time
with concurrent.futures.ProcessPoolExecutor() as executor:
    for i in range(few_jobs):
        executor.submit(long_io_bound)

CPU times: user 24 ms, sys: 4 ms, total: 28 ms
Wall time: 5.05 s


In [28]:
%%time
with concurrent.futures.ProcessPoolExecutor() as executor:
    for i in range(few_jobs):
        executor.submit(long_cpu_bound)

CPU times: user 16 ms, sys: 8 ms, total: 24 ms
Wall time: 15.4 s


In [29]:
%%time
with concurrent.futures.ProcessPoolExecutor() as executor:
    for i in range(many_jobs):
        executor.submit(short_io_bound)

CPU times: user 4.22 s, sys: 808 ms, total: 5.03 s
Wall time: 7.31 s


In [30]:
%%time
with concurrent.futures.ProcessPoolExecutor() as executor:
    for i in range(many_jobs):
        executor.submit(short_cpu_bound)

CPU times: user 3.1 s, sys: 224 ms, total: 3.32 s
Wall time: 19.2 s


## Threading

In [31]:
%%time
with concurrent.futures.ThreadPoolExecutor() as executor:
    for i in range(few_jobs):
        executor.submit(long_io_bound)

CPU times: user 0 ns, sys: 4 ms, total: 4 ms
Wall time: 1.01 s


In [34]:
%%time
with concurrent.futures.ThreadPoolExecutor() as executor:
    for i in range(few_jobs):
        executor.submit(long_cpu_bound)

CPU times: user 8min, sys: 18min 5s, total: 26min 6s
Wall time: 13min 57s


In [32]:
%%time
with concurrent.futures.ThreadPoolExecutor() as executor:
    for i in range(many_jobs):
        executor.submit(short_io_bound)

CPU times: user 784 ms, sys: 156 ms, total: 940 ms
Wall time: 1.49 s


In [33]:
%%time
with concurrent.futures.ThreadPoolExecutor() as executor:
    for i in range(many_jobs):
        executor.submit(short_cpu_bound)

CPU times: user 9min 2s, sys: 20min 40s, total: 29min 43s
Wall time: 15min 52s


## Async (Cooperative Parallelism)

In [41]:
import asyncio

async def cpu_bound(matrix):
    """ Asynchroneous matrix multiplication """
    return matrix @ matrix
    
async def long_io_bound():
    """ A long io bound job which runs for 1 s """
    await asyncio.sleep(1)
    
async def long_cpu_bound():
    """ A long cpu bound job which runs for a few seconds """
    matrix = np.random.uniform(size=(90, 90))
    for i in range(0, 10000):
        matrix = await cpu_bound(matrix)

async def short_io_bound():
    """ A short io bound job which runs for 1 ms """
    await asyncio.sleep(0.001)
    
async def short_cpu_bound():
    """ A short cpu bound job which runs for a few ms """
    matrix = np.random.uniform(size=(90, 90))
    for i in range(0, 10):
        matrix = await cpu_bound(matrix)

In [42]:
loop = asyncio.get_event_loop()

In [43]:
%%time
jobs = [asyncio.async(long_io_bound()) for i in range(few_jobs)]
loop.run_until_complete(asyncio.gather(*jobs))

CPU times: user 20 ms, sys: 8 ms, total: 28 ms
Wall time: 1.04 s


In [44]:
%%time
jobs = [asyncio.async(long_cpu_bound()) for i in range(few_jobs)]
loop.run_until_complete(asyncio.gather(*jobs))

CPU times: user 22.7 s, sys: 1.17 s, total: 23.9 s
Wall time: 12.1 s


In [45]:
%%time
jobs = [asyncio.async(short_io_bound()) for i in range(many_jobs)]
loop.run_until_complete(asyncio.gather(*jobs))

CPU times: user 992 ms, sys: 40 ms, total: 1.03 s
Wall time: 1.03 s


In [46]:
%%time
jobs = [asyncio.async(short_cpu_bound()) for i in range(many_jobs)]
loop.run_until_complete(asyncio.gather(*jobs))

CPU times: user 26.1 s, sys: 2.64 s, total: 28.7 s
Wall time: 14.7 s
