# Load-balancing with ipyparallel

In [1]:
import os,sys,time
import numpy as np

from __future__ import print_function
from IPython.core.display import display
from ipyparallel import Client

rc = Client()
dview = rc[:]


Create a LoadBalancedView

In [2]:
lview = rc.load_balanced_view()
lview

<LoadBalancedView None>

LoadBalancedViews behave very much like a DirectView on a single engine:

Each call to `apply()` results in a single remote computation,
and the result (or AsyncResult) of that call is returned directly,
rather than in a list, as in the multi-engine DirectView.

In [3]:
e0 = rc[0]

In [4]:
from numpy.linalg import norm
A = np.random.random(1024)

e0.apply_sync(norm, A, 2)

18.386064851468049

In [5]:
lview.apply_sync(norm, A, 2)

18.386064851468049

However, unlike the DirectView of a single engine, you are letting the IPython Scheduler decide which engine should do the work:

In [6]:
e0.apply_sync(os.getpid)

18094

In [7]:
for i in range(2*len(rc.ids)):
    print(lview.apply_sync(os.getpid))

18095
18093
18094
18092
18095
18093
18094
18092


# Map

The LoadBalancedView also has a load-balanced version of the builtin `map()`

In [11]:
lview.block = True

serial_result   =       list(map(lambda x:x**10, range(32)))
parallel_result = lview.map(lambda x:x**10, range(32))

serial_result==parallel_result

True

Just like `apply()`, you can use non-blocking map with `block=False` or `map_async`

In [12]:
amr = lview.map_async(lambda x:x**10, range(32))

## Map results are iterable!

AsyncResults with multiple results are actually iterable before their
results arrive.

This means that you can perform map/reduce operations on elements as
they come in:

In [14]:
import ipyparallel as ipp

# scatter 'id', so id=0,1,2 on engines 0,1,2
dv = rc[:]
dv.scatter('id', rc.ids, flatten=True)
print(dv['id'])

# create a Reference to `id`. This will be a different value on each engine
ref = ipp.Reference('id')

tic = time.time()
ar = dv.apply(time.sleep, ref)
for i,r in enumerate(ar):
    print("%i: %.3f"%(i, time.time()-tic))
    sys.stdout.flush()

[0, 1, 2, 3]
0: 0.013
1: 1.011
2: 2.021
3: 3.018


Now we submit a bunch of tasks of increasing magnitude, and
watch where they happen, iterating through the results as they come.

In [15]:
def sleep_here(t):
    """sleep here for a time, return where it happened"""
    import time
    time.sleep(t)
    return id

amr = lview.map(sleep_here, [.01*t for t in range(100)])
tic = time.time()
for i,r in enumerate(amr):
    print("task %i on engine %i: %.3f" % (i, r, time.time()-tic))
    sys.stdout.flush()

task 0 on engine 2: 0.000
task 1 on engine 0: 0.001
task 2 on engine 1: 0.001
task 3 on engine 3: 0.002
task 4 on engine 2: 0.002
task 5 on engine 0: 0.003
task 6 on engine 1: 0.003
task 7 on engine 3: 0.004
task 8 on engine 2: 0.004
task 9 on engine 0: 0.005
task 10 on engine 1: 0.006
task 11 on engine 3: 0.007
task 12 on engine 2: 0.007
task 13 on engine 0: 0.008
task 14 on engine 1: 0.008
task 15 on engine 3: 0.009
task 16 on engine 2: 0.010
task 17 on engine 0: 0.011
task 18 on engine 1: 0.012
task 19 on engine 3: 0.015
task 20 on engine 2: 0.016
task 21 on engine 0: 0.017
task 22 on engine 1: 0.017
task 23 on engine 3: 0.018
task 24 on engine 2: 0.019
task 25 on engine 0: 0.020
task 26 on engine 1: 0.022
task 27 on engine 3: 0.023
task 28 on engine 2: 0.024
task 29 on engine 0: 0.024
task 30 on engine 1: 0.025
task 31 on engine 3: 0.026
task 32 on engine 2: 0.027
task 33 on engine 0: 0.028
task 34 on engine 1: 0.029
task 35 on engine 3: 0.030
task 36 on engine 2: 0.031
task 37 on 

Unlike `DirectView.map()`, which always results in one task per engine,
LoadBalance map defaults to one task per *item* in the sequence.  This
can be changed by specifying the `chunksize` keyword arg.

In [16]:
amr = lview.map(sleep_here, [.01*t for t in range(20)], chunksize=4)
tic = time.time()
for i,r in enumerate(amr):
    print("task %i on engine %i: %.3f"%(i, r, time.time()-tic))
    sys.stdout.flush()

task 0 on engine 2: 0.000
task 1 on engine 2: 0.001
task 2 on engine 2: 0.001
task 3 on engine 2: 0.002
task 4 on engine 0: 0.002
task 5 on engine 0: 0.003
task 6 on engine 0: 0.003
task 7 on engine 0: 0.004
task 8 on engine 1: 0.005
task 9 on engine 1: 0.006
task 10 on engine 1: 0.006
task 11 on engine 1: 0.007
task 12 on engine 3: 0.007
task 13 on engine 3: 0.008
task 14 on engine 3: 0.009
task 15 on engine 3: 0.009
task 16 on engine 2: 0.010
task 17 on engine 2: 0.010
task 18 on engine 2: 0.010
task 19 on engine 2: 0.011


# Example

## Parallelize nested loops

Often we want to run a function with a variety of combinations of arguments.
A useful skill is the ability to express a nested loop in terms of a map.

In [17]:
def area(w,h):
    return w*h


widths = range(1,4)
heights = range(6,10)

areas = []
for w in widths:
    for h in heights:
        areas.append(area(w,h))
areas

[6, 7, 8, 9, 12, 14, 16, 18, 18, 21, 24, 27]

Hint: `itertools.product` and `zip` will be helpful.

In [20]:
# %load soln/nestedloop.py
# To parallelize every call with map, you just need to get a list for each argument.
# You can use `itertools.product` + `zip` to get this:

import itertools

product = list(itertools.product(widths, heights))
# [(1, 6), (1, 7), (2, 6), (2, 7), (3, 6), (3, 7)]

# So we have a "list of pairs", 
# but what we really want is a single list for each argument, i.e. a "pair of lists".
# This is exactly what the slightly weird `zip(*product)` syntax gets us:

allwidths, allheights = zip(*itertools.product(widths, heights))

print(" widths", allwidths)
print("heights", allheights)

# Now we just map our function onto those two lists, to parallelize nested for loops:

ar = lview.map_async(area, allwidths, allheights)


 widths (1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3)
heights (6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9)


In [21]:
# To parallelize every call with map, you just need to get a list for each argument.
# You can use `itertools.product` + `zip` to get this:


import itertools

product = list(itertools.product(widths, heights))
# [(1, 6), (1, 7), (2, 6), (2, 7), (3, 6), (3, 7)]

# So we have a "list of pairs", 
# but what we really want is a single list for each argument, i.e. a "pair of lists".
# This is exactly what the slightly weird `zip(*product)` syntax gets us:

allwidths, allheights = zip(*itertools.product(widths, heights))

print(" widths", allwidths)
print("heights", allheights)

# Now we just map our function onto those two lists, to parallelize nested for loops:

ar = lview.map_async(area, allwidths, allheights)


 widths (1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3)
heights (6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9)


Validate the result:

In [22]:
areas == ar.get()

True