In [1]:
import numpy as np
from joblib import Parallel, delayed
from itertools import product
from time import sleep

In [2]:
def power_it(arr, idx, n=2):
    # We add a sleep in here to simulate heavy work that 
    # needs the parallelizing:
    sleep(0.1)
    return arr[idx] ** n

In [3]:
def parfor(arr, func, n_jobs=1, *args, **kwargs):
    """ 
    Parallel for loop for numpy arrays
    
    Parameters
    ----------
    arr : ndarray
        Input array to operate on
    
    func : callable
        The function to apply to each item in the array. 
        Must have the form: func(arr, idx, *args, *kwargs)
        where arr is an ndarray and idx is an index into 
        that array (a tuple)
        
    n_jobs : integer, optional
        The number of jobs to perform in parallel
        Default: 1
    
    args : list, optional
        Positional arguments to `func`

    kwargs : list, optional
        Keyword arguments to `func`
        
    Returns
    -------
    ndarray of identical shape to `arr`

    Examples
    --------
    >>> def power_it(arr, idx, n=2):
    ...     return arr[idx] ** n
    >>> my_array = np.arange(100).reshape(10, 10)
    >>> parfor(my_array, power_it, n=3, n_jobs=2) 
    """
    idx = product(*(range(s) for s in arr.shape))
    results = Parallel(n_jobs=n_jobs)(delayed(func)(arr, i, *args, **kwargs) for i in idx)
    return np.array(results).reshape(arr.shape)

In [4]:
my_array = np.arange(100).reshape(10, 10)

In [5]:
aa = parfor(my_array, power_it)

In [6]:
aa[8, 8]

7744

In [7]:
i, j = np.random.randint(0, 9, 2)
assert aa[i, j] == power_it(my_array, (i, j)) 

In [15]:
%timeit power_it(my_array, (i, j))
0.1 * 100

10 loops, best of 3: 101 ms per loop


10.0

In [8]:
%timeit parfor(my_array, power_it, n=3, n_jobs=1)

1 loops, best of 3: 10.2 s per loop


In [9]:
%timeit parfor(my_array, power_it, n=3, n_jobs=2)

1 loops, best of 3: 5.13 s per loop


In [10]:
%timeit parfor(my_array, power_it, n=3, n_jobs=8)

1 loops, best of 3: 1.47 s per loop


In [11]:
import multiprocessing

In [12]:
multiprocessing.cpu_count()

4