In [1]:
import multiprocessing
from multiprocessing import Process, Queue

multiprocessing.cpu_count()

4

In [26]:
import time

In [38]:
%time
def func1(x, l1):
    print('func1: starting')
    l1.put(x**2)
    time.sleep(5)
    return 'func1: finishing'

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs


In [39]:
%time
def func2(x, l2):
    print('func2: starting')
    l2.put(x**2)
    time.sleep(5)
    return 'func2: finishing'

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 6.91 µs


In [40]:
l1 = Queue()
p1 = Process(target=func1, args=(4, l1, ))  
l2 = Queue()
p2 = Process(target=func2, args=(2, l2, )) 
p1.start()   
p2.start()      
print(l1.get())         # prints 16
print(l2.get())          # prints 8

func1: starting
func2: starting
16
4


In [None]:
import itertools
from multiprocessing import Pool
import sys

def method1(x):         
    print x
    print x**2
    return x**2

def method2(x):        
    print x
    print x**3
    return x**3

def unzip_func(a, b):  
    return a, b    

def distributor(option_args):
    option, args = unzip_func(*option_args)    # unzip option and args 

    attr_name = "method" + str(option)            
    # creating attr_name depending on option argument

    value = getattr(sys.modules[__name__], attr_name)(args) 
    # call the function with name 'attr_name' with argument args

    return value


option_list = [1,2]      # for selecting the method number
args_list = [4,2]        
# list of arg for the corresponding method, (argument 4 is for method1)

p = Pool(3)              # creating pool of 3 processes

result = p.map(distributor, itertools.izip(option_list, args_list)) 
# calling the distributor function with args zipped as (option1, arg1), (option2, arg2) by itertools package
print result             # prints [16,8]

In [16]:
%time
from multiprocessing import Process

def func1():
    print('func1: starting')
    for i in range(10000000):
        pass
    print('func1: finishing')

def func2():
    print('func2: starting')
    for i in range(10000000):
        
        pass
    print('func2: finishing')

if __name__ == '__main__':
    p1 = Process(target=func1)
    p1.start()
    p2 = Process(target=func2)
    p2.start()
    p1.join()
    p2.join()

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
func1: starting
func2: starting
func2: finishing
func1: finishing


In [21]:
p2.close()

In [22]:
p2

<Process(Process-15, closed)>

In [24]:
%time
def runInParallel(*fns):
    proc = []
    for fn in fns:
        p = Process(target=fn)
        p.start()
        proc.append(p)
    for p in proc:
        p.join()

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.29 µs


In [5]:
runInParallel(func1, func2)

func1: starting
func2: starting
func1: finishing
func2: finishing


In [4]:
import numpy as np
from time import time

# Prepare data
np.random.RandomState(100)
arr = np.random.randint(0, 10, size=[200000, 5])
data = arr.tolist()
data[:5]

[[5, 6, 6, 5, 2],
 [2, 3, 0, 7, 0],
 [0, 7, 0, 3, 9],
 [4, 5, 9, 6, 8],
 [4, 9, 8, 2, 2]]

In [5]:
# Solution Without Paralleization

def howmany_within_range(row, minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

results = []
for row in data:
    results.append(howmany_within_range(row, minimum=4, maximum=8))

print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]


[4, 1, 1, 4, 2, 3, 3, 1, 3, 4]


In [6]:
# Parallelizing using Pool.apply()

import multiprocessing as mp

# Step 1: Init multiprocessing.Pool()
pool = mp.Pool(mp.cpu_count())

# Step 2: `pool.apply` the `howmany_within_range()`
results = [pool.apply(howmany_within_range, args=(row, 4, 8)) for row in data]

# Step 3: Don't forget to close
pool.close()    

print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

[4, 1, 1, 4, 2, 3, 3, 1, 3, 4]


In [7]:
# Parallelizing using Pool.map()
import multiprocessing as mp

# Redefine, with only 1 mandatory argument.
def howmany_within_range_rowonly(row, minimum=4, maximum=8):
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

pool = mp.Pool(mp.cpu_count())

results = pool.map(howmany_within_range_rowonly, [row for row in data])

pool.close()

print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

[4, 1, 1, 4, 2, 3, 3, 1, 3, 4]


In [13]:
from multiprocessing import Process
import os

def info(title):
    print(title)
    print('module name:', __name__)
    print('parent process:', os.getppid())
    print('process id:', os.getpid())

def f(name):
    info('function f')
    print('hello', name)

if __name__ == '__main__':
    info('main line')
    p = Process(target=f, args=('bob',))
    p.start()
    p.join()

main line
module name: __main__
parent process: 1410
process id: 1439
function f
module name: __main__
parent process: 1439
process id: 1708
hello bob


In [2]:
from multiprocessing import Pool

def f(x):
    return x*x

if __name__ == '__main__':
    with Pool(5) as p:
        print(p.map(f, [1, 2, 3]))

[1, 4, 9]


In [3]:
from multiprocessing import Pool



In [5]:
def func(a, b, c):
    return (a + b)/c

with Pool() as pool:
    L = pool.starmap(func, [(1, 1, 2), (2, 1, 2), (3, 1, 2)])

In [6]:
L

[1.0, 1.5, 2.0]