# Simple example to run on multiple processors

In [1]:
import multiprocessing as mp
print("Number of processors: ", mp.cpu_count())

Number of processors:  24


In [12]:
import sys

def is_interactive():
    import __main__ as main
    return not hasattr(main, '__file__')

from sys import argv

if is_interactive():
    params = [12]
else:
    print(argv)
    ss=argv[1:]
    print(ss)
    params = [float(i) for i in ss]
   
    
print(params)
print(is_interactive())


 

[12]
True


In [13]:
params[0]

12

In [4]:
whos

Variable         Type        Data/Info
--------------------------------------
argv             list        n=3
is_interactive   function    <function is_interactive at 0x10e3063a0>
mp               module      <module 'multiprocessing'<...>iprocessing/__init__.py'>
params           int         12
sys              module      <module 'sys' (built-in)>


In [2]:
import numpy as np
from time import time

# Prepare data
np.random.RandomState(100)
arr = np.random.randint(0, 10, size=[200000, 5])
data = arr.tolist()
data[:5]

[[9, 2, 8, 3, 4],
 [9, 0, 0, 5, 5],
 [1, 9, 5, 7, 9],
 [6, 0, 8, 5, 3],
 [3, 2, 8, 6, 9]]

In [12]:
def howmany_within_range(row, minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

results = []
for row in data:
    results.append(howmany_within_range(row, minimum=5, maximum=8))

print(results[:10])

[1, 2, 2, 3, 2, 2, 1, 1, 2, 3]


In [9]:
whos

Variable               Type                          Data/Info
--------------------------------------------------------------
arr                    ndarray                       200000x5: 1000000 elems, type `int64`, 8000000 bytes (7.62939453125 Mb)
data                   list                          n=200000
howmany_within_range   function                      <function howmany_within_range at 0x1dd910700>
mp                     module                        <module 'multiprocessing'<...>iprocessing/__init__.py'>
np                     module                        <module 'numpy' from '/Vo<...>kages/numpy/__init__.py'>
pool                   Pool                          <multiprocessing.pool.Poo<...>l state=RUN pool_size=24>
results                list                          n=200000
row                    list                          n=5
time                   builtin_function_or_method    <built-in function time>


In [5]:
# Parallelizing with Pool.starmap()
import multiprocessing as mp

pool = mp.Pool(mp.cpu_count())

results = pool.starmap(howmany_within_range, [(row, 4, 8) for row in data])

pool.close()

print(results[:10])

KeyboardInterrupt: 

In [13]:
# Parallelizing using Pool.apply()

import multiprocessing as mp

# Step 1: Init multiprocessing.Pool()
pool = mp.Pool(mp.cpu_count())

# Step 2: `pool.apply` the `howmany_within_range()`
results = [pool.apply(howmany_within_range, args=(row, 5, 8)) for row in data]

# Step 3: Don't forget to close
pool.close()    

print(results[:10])

KeyboardInterrupt: 

In [15]:
# Step 1: Redefine, to accept `i`, the iteration number
def howmany_within_range2(i, row, minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return (i, count)


# Parallel processing with Pool.apply_async() without callback function

import multiprocessing as mp
pool = mp.Pool(mp.cpu_count())

results = []

# call apply_async() without callback
result_objects = [pool.apply_async(howmany_within_range2, args=(i, row, 4, 8)) for i, row in enumerate(data)]

# result_objects is a list of pool.ApplyResult objects
results = [r.get()[1] for r in result_objects]

pool.close()
pool.join()
print(results[:10])

KeyboardInterrupt: 