In [1]:
import numpy
import numba
import concurrent.futures

In [2]:
@numba.jit
def mandel(x, y, max_iters=20):
    """
    Given the real and imaginary parts of a complex number,
    determine if it is a candidate for membership in the Mandelbrot
    set given a fixed number of iterations.
    """
    i = 0
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z ** 2 + c
        if (z.real ** 2 + z.imag ** 2) >= 4:
            return i

    return max_iters

In [3]:
width = 2048
height = 2048
xs = numpy.linspace(-2.0, 1.0, width)
ys = numpy.linspace(-1.0, 1.0, height)
img = numpy.zeros((width, height), dtype=numpy.int8)
img1 = img.copy()
img2 = img.copy()

In [4]:
@numba.jit(nogil=True)
def mandel_tile(xs, ys, out):
    for i in range(xs.size):
        for j in range(ys.size):
            out[i, j] = mandel(xs[i], ys[j])
    return out

Warm up

In [5]:
mandel_tile(xs, ys, img)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]], dtype=int8)

In [6]:
%%timeit

mandel_tile(xs, ys, img)

10 loops, best of 3: 133 ms per loop


In [7]:
numpy.linalg.norm(img)

23559.225921069647

In [8]:
%%timeit

npar = 2

x_step = img.shape[0] // npar
y_step = img.shape[1] // npar
with concurrent.futures.ThreadPoolExecutor(4) as exe:
    futs = []
    for pos_x in range(0, img.shape[0], x_step):
        for pos_y in range(0, img.shape[1], y_step):
            futs.append(exe.submit(mandel_tile, xs[pos_x : pos_x + x_step], 
                                   ys[pos_y : pos_y + y_step], img1[pos_x:, pos_y:]))
    for f in futs:
        f.result()
    

The slowest run took 4.23 times longer than the fastest. This could mean that an intermediate result is being cached.
10 loops, best of 3: 43.5 ms per loop


In [9]:
numpy.linalg.norm(img1)

23559.225921069647

In [17]:
%%timeit

npar = 8

x_step = img.shape[0] // npar
y_step = img.shape[1] // npar
with concurrent.futures.ThreadPoolExecutor(8) as exe:
    futs = []
    for pos_x in range(0, img.shape[0], x_step):
        for pos_y in range(0, img.shape[1], y_step):
            futs.append(exe.submit(mandel_tile, xs[pos_x : pos_x + x_step], 
                                   ys[pos_y : pos_y + y_step], img2[pos_x:, pos_y:]))
    for f in futs:
        f.result()
    

10 loops, best of 3: 24.8 ms per loop


In [11]:
numpy.linalg.norm(img2)

23559.225921069647

In [12]:
numpy.all(img1 == img2)

True

In [13]:
numpy.all(img == img2)

True