### Parallelization

Code does not run on VSCode in Windows machine.

In [1]:
import concurrent.futures
import time
import numpy as np

In [9]:
def fun(x):
    time.sleep(1)
    return x

In [10]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:
    a=list(executor.map(fun,[1]*10))

print(a)
print(f'time elapsed: {time.time()-start} seconds')

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
time elapsed: 1.100304126739502 seconds


In [11]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    a=list(executor.map(fun,[1]*10))

print(a)
print(f'time elapsed: {time.time()-start} seconds')

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
time elapsed: 1.0164897441864014 seconds


`ProcessPoolExecutor` vs `ThreatPoolExecutor`

In [7]:
def invert_matrix(x):
    np.linalg.inv(np.random.rand(400,400))
    return x

start_process = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:
    executor.map(invert_matrix,[1]*10)
print(f'Process: time elapsed: {time.time()-start_process} seconds')

start_threat = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    executor.map(invert_matrix,[1]*10)
print(f'Thread: time elapsed: {time.time()-start_threat} seconds')

Process: time elapsed: 3.0895705223083496 seconds
Thread: time elapsed: 3.418935537338257 seconds


Paralelizing many, small tasks might be counter-productive

In [16]:
def fun(x):
    return x
x=[1]*1000

start = time.time()
list(map(fun, x))
print(f'time elapsed: {time.time()-start} seconds')

start_process = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers=50) as executor:
    executor.map(fun,x)
print(f'time elapsed: {time.time()-start_process} seconds')

time elapsed: 0.0001800060272216797 seconds
time elapsed: 0.8244543075561523 seconds


To parallelize many, easy tasks, use `chunksize` to reduce over-head costs.

In [14]:
def fun(x):
    time.sleep(0.1)
    return x

x = [1]*100

start = time.time()
list(map(fun,x))
print(f'Time elapsed using map (expect 10 s): {time.time()-start} seconds')

start = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers = 100) as executor:
    executor.map(fun,x, chunksize = 1)
print(f'Time elapsed using 100 workers (expect 0.1s): {time.time()-start} seconds')


start = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers = 10) as executor:
    executor.map(fun,x, chunksize = 10)
print(f'Time elapsed usign 10 workers and cunnksize 10 (expect 1 s): {time.time()-start} seconds')



time elapsed using map (expect 10 s): 10.020093202590942 seconds
time elapsed using 100 workers (expect 0.1s): 0.9341230392456055 seconds
time elapsed usign 10 workers and cunnksize 10 (expect 1 s): 1.1265740394592285 seconds
