In [None]:
import sys
import time
import random
import platform
import traceback
import multiprocessing as mp

Multiprocessing vs Threading:
* Use threading if your program is network bound  
* Multiprocessing if it's CPU bound.

In [None]:
print(platform.platform())

Linux-5.4.144+-x86_64-with-Ubuntu-18.04-bionic


In [None]:
!lscpu

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              2
On-line CPU(s) list: 0,1
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               79
Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz
Stepping:            0
CPU MHz:             2200.000
BogoMIPS:            4400.00
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            256K
L3 cache:            56320K
NUMA node0 CPU(s):   0,1
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_sin

In [None]:
print("Python version:\n", sys.version)

Python version:
 3.7.12 (default, Jan 15 2022, 18:48:18) 
[GCC 7.5.0]


In [None]:
print("Number of CPUs: ", mp.cpu_count())

Number of CPUs:  2


Python multiprocessing package: https://docs.python.org/3/library/multiprocessing.html

# Pool

Pool makes a process for every collection of arguments in the argument list when a processor is free to accept that process.

## Example 1: The map method

The map function is designed to apply the same function to each item in an iterator.

In [None]:
def f(x):
  return x*x

with mp.Pool(processes=2) as p:
  print(p.map(f, [1, 2, 3, 4, 5, 6, 7, 8]))

[1, 4, 9, 16, 25, 36, 49, 64]


## Example 2: Taking multiple arguments

In [None]:
def work_log(work_data):
  print(" Process %s waiting %s seconds" % (work_data[0], work_data[1]))
  time.sleep(int(work_data[1]))
  print(" Process %s Finished." % work_data[0])

work = (["A", 5], ["B", 2], ["C", 1], ["D", 3])
p = mp.Pool(2)
p.map(work_log, work)

 Process A waiting 5 seconds
 Process B waiting 2 seconds
 Process B Finished.
 Process C waiting 1 seconds
 Process C Finished.
 Process D waiting 3 seconds
 Process A Finished.
 Process D Finished.


[None, None, None, None]

## Example 3: The apply_async method

The Pool class also provides the apply_async method that makes asynchronous execution of the worker processes possible. Unlike the map method, which executes a computational routine over a list of inputs, the apply_async method executes the routine only once. 

In [None]:
def worker(i):
    time.sleep(random.random())
    print(i)

p = mp.Pool(4)
for i in range(8):
    p.apply_async(worker, args=(i,))

p.close() # we are done making processes.
p.join() # blocks until all the processes are finished

1
4
2
0
5
3
6
7


When invoking apply_async(), a new task is submitted to the pool and the task will start once there is an empty slot in the pool. Pool also has a synchronized version of this method, apply(), which will block the parent process until the result is ready. Therefore, for parallelism, apply_async() should always be used. We also used join() method to block the parent process after we finished adding all tasks to the pool. If we do not block the parent process, parent process might exit before child processes finish their tasks. That's why we always use join() in the parent process to avoid this situation. Note that close() must be called before using join().

In [None]:
def worker(i):
    time.sleep(random.random())
    print(i)

p = mp.Pool(4)
for i in range(8):
    p.apply(worker, args=(i,)) # No concurrence if you use apply.

p.close() # we are done making processes.
p.join() # blocks until all the processes are finished

0
1
2
3
4
5
6
7


As you can see from the following equivalent implementation, `apply()` is actually calling `apply_async()` but just before returning result, `get()` is called. This basically makes `apply_async()` block until result is returned.

In [None]:
def apply(self, func, args=(), kwds={}):
    '''
    Equivalent of `apply()` builtin
    '''
    assert self._state == RUN
    return self.apply_async(func, args, kwds).get()

# Process

It is quite easy to create a new process in Python. We basically pass a target function and all arguments needed by the invocation to the `Process` object. Use `start()` to start the process's activity (i.e. the invocation of the target function). Use `join()` to block the parent process until the child process exits.

In [None]:
names = ['America', 'Europe', 'Africa']
procs = []

def print_func(continent='Asia'):
  print('The name of continent is : ', continent)
  
# instantiating process with arguments
for name in names:
  # print(name)
  proc = mp.Process(target=print_func, args=(name,))
  procs.append(proc)
  proc.start()

# complete the processes
for proc in procs:
  proc.join()

The name of continent is :  America
The name of continent is :  Europe
The name of continent is :  Africa


We also used join() method to block the parent process after we finished adding all tasks to the pool. If we do not block the parent process, parent process might exit before child processes finish their tasks. That's why we always use join() in the parent process to avoid this situation. Note that close() must be called before using join().

# Queue

The Queue class can be used to save results from the processes.

Example of the put() and get() method:

In [None]:
colors = ['red', 'green', 'blue', 'black']
cnt = 1

# instantiating a queue object
queue = mp.Queue()
print('pushing items to queue:')
for color in colors:
    print('item no: ', cnt, ' ', color)
    queue.put(color)
    cnt += 1

print('\npopping items from queue:')
cnt = 0
while not queue.empty():
    print('item no: ', cnt, ' ', queue.get())
    cnt += 1

pushing items to queue:
item no:  1   red
item no:  2   green
item no:  3   blue
item no:  4   black

popping items from queue:


Passing Data Between Processes

In [None]:
# Example 1
def rand_num(queue):
    num = random.random()
    queue.put(num)

queue = mp.Queue()

processes = [mp.Process(target=rand_num, args=(queue,)) for x in range(4)]

for p in processes:
  p.start()

for p in processes:
  p.join()
  
results = [queue.get() for p in processes]

print(results)

[0.8334733051651524, 0.3148903445721938, 0.20202767572755653, 0.5754820448441449]


## Example of one producer and one consumer

In [None]:
def producer(q):
    for i in range(5):
        time.sleep(random.random())
        q.put(i)
    q.put('end') # to notify the consumer process that the producer process has exited
    print('producer exits')

def consumer(q):
    while True:
        product = q.get()
        if product == 'end':
            break
        print(product)
    print('consumer exits')

In [None]:
q = mp.Queue()
p1 = mp.Process(target=producer, args=(q,))
p2 = mp.Process(target=consumer, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()

0
1
2
3
4
producer exits
consumer exits


## Example of multiple producers and multiple consumers

We should use multiprocessing.Manager.Queue among pool workers.

We can pass an error callback function to the apply_async() method, and that function will be used to print stack trace if exceptions happen. Note that the callback functions will block the parent process, so, do not put time-consuming tasks in the callback function since it should complete immediately. The error callback function only takes one argument and that is the exception instance.

In [None]:
def print_traceback(e):
    print(traceback.print_exception(type(e), e, e.__traceback__))

def producer(q, producer_id):
    for i in range(5):
        time.sleep(random.random())
        q.put(f'product {i} from producer {producer_id}')
    q.put('end') # to notify the consumer process that a producer process has exited
    print(f'producer {producer_id} exits')

def consumer(q, consumer_id):
    while True:
        product = q.get()
        if product == 'end':
            break
        print(f'{product} consumed by consumer {consumer_id}')
    print(f'consumer {consumer_id} exits')

In [None]:
manager = mp.Manager()

q = manager.Queue()

producer_count = 2
consumer_count = 2
producer_pool = mp.Pool(producer_count)
consumer_pool = mp.Pool(consumer_count)

for i in range(producer_count):
    producer_pool.apply_async(producer, args=(q, i), error_callback=print_traceback)

for i in range(consumer_count):
    consumer_pool.apply_async(consumer, args=(q, i), error_callback=print_traceback)

producer_pool.close()
consumer_pool.close()
producer_pool.join()
consumer_pool.join()

product 0 from producer 0 consumed by consumer 0
product 0 from producer 1 consumed by consumer 1
product 1 from producer 0 consumed by consumer 0
product 1 from producer 1 consumed by consumer 1
product 2 from producer 0 consumed by consumer 0
product 3 from producer 0 consumed by consumer 1
product 2 from producer 1 consumed by consumer 0
product 3 from producer 1 consumed by consumer 1
product 4 from producer 0 consumed by consumer 0
producer 0 exits
consumer 1 exits
producer 1 exits
product 4 from producer 1 consumed by consumer 0
consumer 0 exits


# Pipe

Pipes in multiprocessing are primarily used for communication between processes.

In [None]:
def f(conn):
    conn.send(['hello world'])
    conn.close()

In [None]:
parent_conn, child_conn = mp. Pipe()
p = mp.Process(target=f, args=(child_conn,))
p.start()
print(parent_conn.recv())
p.join()

['hello world']


# Locks

Locks work by ensuring that only one process is executed at a time, hence blocking other processes from executing similar code. 

In [None]:
def greeting(l, i):
    l.acquire()
    print('hello', i)
    l.release()
 
if __name__ == '__main__':
    lock = mp.Lock()
    names = ['Alex', 'Sam', 'Bernard', 'Patrick', 'Jude', 'Williams']
 
    for name in names:
        mp.Process(target=greeting, args=(lock, name)).start()

hello Alex
hello Sam
hello Jude
hello Bernard


## Race conditions between processes

We have four add wokers and two sub wokers. Since each worker will change the shared value by +100/-100, we should have 200 as the final result. However, the result is not what we expected because race conditions happened.

We wrap the augmented assignment with a Manager.Lock in the following code, which gives us the correct output. Apart from with statements we used previously, we can also explictly call acquire() and release() method of Lock.

In [None]:
def print_traceback(e):
    print(traceback.print_exception(type(e), e, e.__traceback__))

def add(counter):
    for _ in range(100):
        counter.value += 1

def sub(counter):
    for _ in range(100):
        counter.value -= 1

manager = mp.Manager()
counter = manager.Value('i', 0)

add_pool = mp.Pool(4)
add_pool.starmap_async(add, [(counter,) for _ in range(4)], error_callback=print_traceback)

sub_pool = mp.Pool(2)
sub_pool.starmap_async(sub, [(counter,) for _ in range(2)], error_callback=print_traceback)

add_pool.close()
sub_pool.close()

add_pool.join()
sub_pool.join()

print(counter.value)

88


In [None]:
def print_traceback(e):
    print(traceback.print_exception(type(e), e, e.__traceback__))

def add(counter, counter_lock):
    for _ in range(100):
        counter_lock.acquire()
        counter.value += 1
        counter_lock.release()

def sub(counter, counter_lock):
    for _ in range(100):
        counter_lock.acquire()
        counter.value -= 1
        counter_lock.release()

manager = mp.Manager()
counter = manager.Value('i', 0)
counter_lock = manager.Lock()
add_pool = mp.Pool(4)
add_pool.starmap_async(add, [(counter, counter_lock) for _ in range(4)], error_callback=print_traceback)
sub_pool = mp.Pool(2)
sub_pool.starmap_async(sub, [(counter, counter_lock) for _ in range(2)], error_callback=print_traceback)
add_pool.close()
sub_pool.close()
add_pool.join()
sub_pool.join()
print(counter.value)

200


## Fixing bugs in mutli-provider multi-consumer code

The privious multip-provider multi-consumer example has several flaws. 
* Firstly, invocations of print() in different processes has race conditions, which could mess up the printed text in the console. 
* Also, a consumer exits immediately when it receives an end signal, which is not desired since there might be products from one producer enqueued after the end signal from another producer. We want consumers to exit only if all producers have exited.

To handle the print() problem, we need a lock to avoid race conditions. To know whether all producers have exited or not, we need a shared value among consumers, which counts the number of exited producers. This shared value will be increased by 1 every time a consumer receives an end signal.

In [None]:
def print_traceback(e):
    print(traceback.print_exception(type(e), e, e.__traceback__))

def producer(q, producer_id, print_lock):
    for i in range(5):
        time.sleep(random.random())
        q.put(f'product {i} from producer {producer_id}')
    q.put('end') # to notify the consumer process that a producer process has exited
    with print_lock:
        print(f'producer {producer_id} exits')

def consumer(q, consumer_id, producer_count, print_lock, exited_producer_count, count_lock):
    while True:
        try:
            product = q.get_nowait()
        except:
            if exited_producer_count.value == producer_count:
                # all producers have exited
                break
            else:
                continue
        if product == 'end':
            with count_lock:
                exited_producer_count.value += 1
            continue
        with print_lock:
            print(f'{product} consumed by consumer {consumer_id}')
    with print_lock:
        print(f'consumer {consumer_id} exits')

In [None]:
manager = mp.Manager()
q = manager.Queue()

exited_producer_count = manager.Value('i', 0) # type code 'i' means 'signed int' in C type
print_lock = manager.Lock()
count_lock = manager.Lock()

producer_count = 2
consumer_count = 2
producer_pool = mp.Pool(producer_count)
consumer_pool = mp.Pool(consumer_count)

for i in range(producer_count):
    producer_pool.apply_async(producer, args=(q, i, print_lock), error_callback=print_traceback)

for i in range(consumer_count):
    consumer_pool.apply_async(consumer, args=(q, i, producer_count, print_lock, exited_producer_count, count_lock), error_callback=print_traceback)

producer_pool.close()
consumer_pool.close()

producer_pool.join()
consumer_pool.join()

product 0 from producer 1 consumed by consumer 0
product 0 from producer 0 consumed by consumer 0
product 1 from producer 1 consumed by consumer 0
product 2 from producer 1 consumed by consumer 0
product 1 from producer 0 consumed by consumer 1
product 2 from producer 0 consumed by consumer 1
product 3 from producer 1 consumed by consumer 0
product 4 from producer 1 consumed by consumer 1
producer 1 exits
product 3 from producer 0 consumed by consumer 0
producer 0 exits
product 4 from producer 0 consumed by consumer 0
consumer 1 exits
consumer 0 exits
