In [None]:
import numba.cuda
import math
import numpy

@numba.cuda.jit
def as_cuda(c, fractal, maxiterations):
    x, y = numba.cuda.grid(2)     # 2 dimensional CUDA grid
    z = c[x, y]
    fractal[x, y] = 20
    for i in range(maxiterations):
        z = z**2 + c[x, y]
        if abs(z) > 2:
            fractal[x, y] = i
            break                 # not optimal: threads that leave the loop still have to wait

def run_numba(height, width, maxiterations=20):
    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]
    c = x + y*1j
    fractal = numba.cuda.device_array(c.shape, dtype=numpy.int32)
    as_cuda[(math.ceil(height / 32), math.ceil(width / 32)), (32, 32)](c, fractal, maxiterations)
    return fractal

Numba doesn't suffer from the memory issue because it doesn't make as many intermediate copies.

In [None]:
import time

starttime = time.time()
fractal = run_numba(4000, 6000)
time.time() - starttime

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 8))
ax.imshow(fractal)
# ax.imshow(fractal[-2000:, :3000])