Parallel Processing in Python
====

You will almost always start from the question, "How can I break up this problem into smaller pieces that can run concurrently?"

Once you have an answer to that question, there are a few Python tools that can help implement that answer.

Multiprocessing
----

In [1]:
import multiprocessing
import time
import numpy.random as rand

data = [(a,rand.uniform(0,1)) for a in 'abcdefghijklmnop']

def mp_worker(data):
    inputs, the_time = data
    print(" Processs %s\tWaiting %s seconds" % (inputs, the_time))
    time.sleep(float(the_time))
    print(" Process %s\tDONE" % inputs)
    return inputs.upper()

def mp_handler(N, workerfn):
    p = multiprocessing.Pool(N)
    return p.map(workerfn, data)

In [2]:
data

[('a', 0.21973323834345593),
 ('b', 0.7602641691621669),
 ('c', 0.4101835164813994),
 ('d', 0.8501303089754695),
 ('e', 0.8303385570847972),
 ('f', 0.7106315417913897),
 ('g', 0.15949976558978363),
 ('h', 0.8413333486947812),
 ('i', 0.7667025174769228),
 ('j', 0.47345888772409606),
 ('k', 0.6332304856802801),
 ('l', 0.2422721977879746),
 ('m', 0.7012816779230043),
 ('n', 0.046515119997054155),
 ('o', 0.29515034856377087),
 ('p', 0.519778887902513)]

In [3]:
[ mp_worker(d) for d in data ]

 Processs a	Waiting 0.21973323834345593 seconds
 Process a	DONE
 Processs b	Waiting 0.7602641691621669 seconds
 Process b	DONE
 Processs c	Waiting 0.4101835164813994 seconds
 Process c	DONE
 Processs d	Waiting 0.8501303089754695 seconds
 Process d	DONE
 Processs e	Waiting 0.8303385570847972 seconds
 Process e	DONE
 Processs f	Waiting 0.7106315417913897 seconds
 Process f	DONE
 Processs g	Waiting 0.15949976558978363 seconds
 Process g	DONE
 Processs h	Waiting 0.8413333486947812 seconds
 Process h	DONE
 Processs i	Waiting 0.7667025174769228 seconds
 Process i	DONE
 Processs j	Waiting 0.47345888772409606 seconds
 Process j	DONE
 Processs k	Waiting 0.6332304856802801 seconds
 Process k	DONE
 Processs l	Waiting 0.2422721977879746 seconds
 Process l	DONE
 Processs m	Waiting 0.7012816779230043 seconds
 Process m	DONE
 Processs n	Waiting 0.046515119997054155 seconds
 Process n	DONE
 Processs o	Waiting 0.29515034856377087 seconds
 Process o	DONE
 Processs p	Waiting 0.519778887902513 seconds
 Pr

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P']

In [4]:
mp_handler(3, mp_worker)

 Processs e	Waiting 0.8303385570847972 seconds Processs a	Waiting 0.21973323834345593 seconds Processs c	Waiting 0.4101835164813994 seconds


 Process a	DONE
 Processs b	Waiting 0.7602641691621669 seconds
 Process c	DONE
 Processs d	Waiting 0.8501303089754695 seconds
 Process e	DONE
 Processs f	Waiting 0.7106315417913897 seconds
 Process b	DONE
 Processs g	Waiting 0.15949976558978363 seconds
 Process g	DONE
 Processs h	Waiting 0.8413333486947812 seconds
 Process d	DONE
 Processs i	Waiting 0.7667025174769228 seconds
 Process f	DONE
 Processs k	Waiting 0.6332304856802801 seconds
 Process h	DONE
 Processs m	Waiting 0.7012816779230043 seconds
 Process i	DONE
 Processs j	Waiting 0.47345888772409606 seconds
 Process k	DONE
 Processs l	Waiting 0.2422721977879746 seconds
 Process l	DONE
 Processs o	Waiting 0.29515034856377087 seconds
 Process j	DONE
 Process m	DONE
 Processs n	Waiting 0.046515119997054155 seconds
 Process o	DONE
 Processs p	Waiting 0.519778887902513 seconds
 Process n	DONE
 Pr

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P']

In [5]:
import numpy as np

def bigpower(power):
    d = np.random.randn(100000000)**power
    print("Raising random array to the {0}th power".format(power))
    return d.mean()

data = [1, 2, 12, 15]

In [6]:
mp_handler(3, bigpower)

Raising random array to the 2th power
Raising random array to the 1th power
Raising random array to the 12th power
Raising random array to the 15th power


[-7.331594074173184e-06,
 1.0000491528479363,
 10433.740013761022,
 1081.932453308024]

Threading
----

Threads are lighter-weight since they share the Python interpreter and can sometimes share data. But mind the GIL!

In [7]:
import threading
import queue

In [8]:
q = queue.Queue()

In [9]:
q.put('foo')

In [10]:
q.put(5)

In [11]:
q.put('even more')

In [15]:
q.get(block=False)

Empty: 

In [16]:
def work():
    q.put(np.random.randn(1000))

In [17]:
t = threading.Thread(target=work)

In [18]:
t

<Thread(Thread-10, initial)>

In [20]:
t.start()

In [21]:
q.get(block=False)

array([-4.76668531e-01, -1.37956692e-01, -1.17462355e+00, -3.31915600e-01,
        1.40008800e+00, -3.18371377e-01,  2.55221580e-01, -1.71494570e-01,
       -7.89272012e-02,  5.53789986e-01,  4.19028742e-01,  7.75291788e-01,
       -3.72399962e-02,  1.79420242e+00, -9.70655953e-01, -9.71871186e-01,
       -5.18105889e-01, -3.78435424e-01, -2.86524225e-02, -2.19500400e+00,
       -2.09381967e+00, -1.18534975e+00, -1.45010388e-02, -6.64251863e-01,
        7.72181289e-02, -7.47545059e-01,  5.14685679e-01, -4.71819585e-01,
        3.51431938e-01,  1.06525052e+00, -2.17288326e-01,  3.43477669e-01,
        7.85356425e-01, -5.65473282e-01, -4.12519213e-01,  1.05968166e+00,
        9.59009851e-01,  7.91983859e-01,  9.90673768e-01,  1.23869382e+00,
        2.85948955e-01,  2.53728997e-01, -1.32437968e+00, -1.72843167e+00,
       -7.25951522e-01, -2.05589625e-01, -2.58683942e-02,  1.45099913e-02,
       -2.04833714e-01, -5.00791059e-03, -3.21967500e-01, -5.52671017e-01,
       -8.12774691e-02,  

In [22]:
multiprocessing.Pool?

[0;31mSignature:[0m
[0mmultiprocessing[0m[0;34m.[0m[0mPool[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mprocesses[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitializer[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitargs[0m[0;34m=[0m[0;34m([0m[0;34m)[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmaxtasksperchild[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Returns a process pool object
[0;31mFile:[0m      /panfs/roc/msisoft/anaconda/python3-2020.07-mamba/lib/python3.8/multiprocessing/context.py
[0;31mType:[0m      method


Dask
---

Higher level abstractions are available!

In [23]:
import numpy as np
import dask.array as da
import memory_profiler

In [24]:
Y = da.random.normal(size=(1000, 1000),
                     chunks=(100, 100))

Y

Unnamed: 0,Array,Chunk
Bytes,8.00 MB,80.00 kB
Shape,"(1000, 1000)","(100, 100)"
Count,100 Tasks,100 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8.00 MB 80.00 kB Shape (1000, 1000) (100, 100) Count 100 Tasks 100 Chunks Type float64 numpy.ndarray",1000  1000,

Unnamed: 0,Array,Chunk
Bytes,8.00 MB,80.00 kB
Shape,"(1000, 1000)","(100, 100)"
Count,100 Tasks,100 Chunks
Type,float64,numpy.ndarray


In [25]:
mu = Y.mean(axis=0)
mu

Unnamed: 0,Array,Chunk
Bytes,8.00 kB,800 B
Shape,"(1000,)","(100,)"
Count,240 Tasks,10 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8.00 kB 800 B Shape (1000,) (100,) Count 240 Tasks 10 Chunks Type float64 numpy.ndarray",1000  1,

Unnamed: 0,Array,Chunk
Bytes,8.00 kB,800 B
Shape,"(1000,)","(100,)"
Count,240 Tasks,10 Chunks
Type,float64,numpy.ndarray


In [26]:
mu.sum()

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,254 Tasks,1 Chunks
Type,float64,numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Count 254 Tasks 1 Chunks Type float64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,254 Tasks,1 Chunks
Type,float64,numpy.ndarray


Notice the computation hasn't actually happened yet...

In [28]:
mu[0].compute()

0.04133749108270661

In [29]:
from dask.diagnostics import ProgressBar

with ProgressBar():
    mu = Y.mean().sum().compute()

[########################################] | 100% Completed |  0.2s


In [30]:
mu

-0.002249287800002957