# Parallel processing in python

## Number of processors on my PC

In [1]:
import multiprocessing as mp

print("Number of processors: ", mp.cpu_count())

Number of processors:  16


I can run 16 parallel processes

## Problem Statement: Count how many numbers exist between a given range in each row

In [2]:
import numpy as np
from time import time

# Prepare data
np.random.RandomState(100)
arr = np.random.randint(0,10,size=[200000,5])
data = arr.tolist()
data[:5]

[[1, 7, 9, 4, 4],
 [9, 0, 0, 3, 9],
 [8, 2, 3, 8, 3],
 [4, 5, 5, 8, 6],
 [8, 5, 7, 5, 1]]

In [3]:
def howmany_within_range(row,minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count+=1
    return count


## Solution without parallelization

In [4]:
%%timeit
results = []
for row in data:
    results.append(howmany_within_range(row,minimum=4,maximum=8))
    
print(results[:10])

[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
[3, 0, 2, 5, 4, 3, 3, 3, 3, 2]
164 ms ± 1.73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)



## Parallelizing using Pool.apply()

In [5]:
%%timeit
# Step 1: Init multiprocessing.Pool()
pool = mp.Pool(mp.cpu_count())

#Step 2: `pool.apply` the `howmany_within_range()`
results = [pool.apply(howmany_within_range,args=(row,4,8)) for row in data]

# Step 3: Don't forget to close
pool.close()
print(results[:10])

Process ForkPoolWorker-6:
Process ForkPoolWorker-5:
Process ForkPoolWorker-12:
Process ForkPoolWorker-7:
Process ForkPoolWorker-15:
Process ForkPoolWorker-2:
Process ForkPoolWorker-3:
Process ForkPoolWorker-13:
Process ForkPoolWorker-14:
Process ForkPoolWorker-1:
Process ForkPoolWorker-16:
Process ForkPoolWorker-4:
Process ForkPoolWorker-8:
Process ForkPoolWorker-9:
Process ForkPoolWorker-11:
Process ForkPoolWorker-10:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/climbach/anaconda3/lib/python3.7/multi

KeyboardInterrupt: 