# Using Numba with CuPy

In [1]:
from utiliviz import register_magic
register_magic(setup_bokeh=True)

In [2]:
from numba import guvectorize, njit
import math
import cupy
import numpy as np

## Example: Kernel Density Estimation

In [3]:
@njit
def gaussian(x):
    return math.exp(-0.5 * x**2) / math.sqrt(2 * math.pi)


def kde(eval_x, samples, bandwidths, result):
    "Kernel density estimation"
    r = 0.0
    for sample, bandwidth in zip(samples, bandwidths):
        r += gaussian((eval_x - sample) / bandwidth) / bandwidth
    result[0] = r / len(samples)

    
kde_types = ['(f4, f4[:], f4[:], f4[:])',
             '(f8, f8[:], f8[:], f8[:])']
kde_shape = '(),(n),(n)->()'
cpu_kde = guvectorize(kde_types, kde_shape, target='parallel', fastmath=True)(kde)
gpu_kde = guvectorize(kde_types, kde_shape, target='cuda')(kde)

In [4]:
def input_generator(api, nsamples):
    dtype = api.float64
    nsamples = nsamples
    sigma = 5.0
    neval = 20000
    samples = api.random.normal(loc=0.0, scale=sigma, size=nsamples).astype(dtype)
    # For simplicity, initialize bandwidth array with constant using 1D rule of thumb
    bandwidths = api.full_like(samples, 1.06 * nsamples**0.2 * sigma)
    eval_points = api.random.normal(loc=0.0, scale=5.0, size=neval).astype(dtype)
    return (eval_points, samples, bandwidths)

In [6]:
def run(nsamples, use_gpu=False):
    if use_gpu:
        api = cupy
        kde = gpu_kde
    else:
        api = np
        kde = cpu_kde
        
    args = input_generator(api, nsamples=nsamples)
    out = kde(*args)
    
    if use_gpu:
        return out.copy_to_host()
    else:
        return out

In [7]:
%%utiliviz --cuda
%%time

run(use_gpu=False, nsamples=100000)

CPU times: user 24.7 s, sys: 12 ms, total: 24.7 s
Wall time: 3.25 s


array([0.00730502, 0.00747918, 0.00748211, ..., 0.00748054, 0.00749391,
       0.00740979])

In [8]:
%%utiliviz --cuda
%%time

run(use_gpu=True, nsamples=100000)

CPU times: user 560 ms, sys: 92 ms, total: 652 ms
Wall time: 655 ms


array([0.00748924, 0.0074892 , 0.00747468, ..., 0.0074927 , 0.00747724,
       0.007485  ])