Skip to content

Commit

Permalink
Kernel density estimator example
Browse files Browse the repository at this point in the history
  • Loading branch information
seibert committed Jul 24, 2017
1 parent 0320a3a commit c90a013
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 0 deletions.
18 changes: 18 additions & 0 deletions examples/density_estimation/kernel/bench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: Kernel Density Estimator
description: |
Evaluate a 1D Gaussian [kernel density estimator](https://en.wikipedia.org/wiki/Kernel_density_estimation)
at a list of points given a list of samples from the distribution and corresponding kernel bandwidths.
input_generator: impl.py:input_generator
xlabel: Number of evaluation points
validator: impl.py:validator
implementations:
- name: numpy
description: Numpy function
function: impl.py:numpy_kde
- name: numba
description: Numba single threaded
function: impl.py:numba_kde
- name: numba_multithread
description: Numba multi-threaded
function: impl.py:numba_kde_multithread
baseline: numpy
69 changes: 69 additions & 0 deletions examples/density_estimation/kernel/impl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import numpy as np

def input_generator():
for dtype in [np.float64]:
for nsamples in [1000, 10000]:
sigma = 5.0
samples = np.random.normal(loc=0.0, scale=sigma, size=nsamples).astype(dtype)
# For simplicity, initialize bandwidth array with constant using 1D rule of thumb
bandwidths = np.full_like(samples, 1.06 * nsamples**0.2 * sigma)
for neval in [10, 1000, 10000]:
category = ('samples%d' % nsamples, np.dtype(dtype).name)
eval_points = np.random.normal(loc=0.0, scale=5.0, size=neval).astype(dtype)
yield dict(category=category, x=neval, input_args=(eval_points, samples, bandwidths), input_kwargs={})

#### BEGIN: numpy
import numpy as np

def numpy_kde(eval_points, samples, bandwidths):
# This uses a lot of RAM and doesn't scale to larger datasets
rescaled_x = (eval_points[:, np.newaxis] - samples[np.newaxis, :]) / bandwidths[np.newaxis, :]
gaussian = np.exp(-0.5 * rescaled_x**2) / np.sqrt(2 * np.pi) / bandwidths[np.newaxis, :]
return gaussian.sum(axis=1) / len(samples)

#### END: numpy

#### BEGIN: numba
import numba
import numpy as np

@numba.jit(nopython=True)
def gaussian(x):
return np.exp(-0.5 * x**2) / np.sqrt(2 * np.pi)

@numba.jit(nopython=True)
def numba_kde(eval_points, samples, bandwidths):
result = np.zeros_like(eval_points)

for i, eval_x in enumerate(eval_points):
for sample, bandwidth in zip(samples, bandwidths):
result[i] += gaussian((eval_x - sample) / bandwidth) / bandwidth
result[i] /= len(samples)

return result
#### END: numba


#### BEGIN: numba_multithread
import numba
import numpy as np

@numba.jit(nopython=True, parallel=True)
def numba_kde_multithread(eval_points, samples, bandwidths):
result = np.zeros_like(eval_points)

# SPEEDTIP: Parallelize over evaluation points with prange()
for i in numba.prange(len(eval_points)):
eval_x = eval_points[i]
for sample, bandwidth in zip(samples, bandwidths):
result[i] += gaussian((eval_x - sample) / bandwidth) / bandwidth
result[i] /= len(samples)

return result
#### END: numba_multithread


def validator(input_args, input_kwargs, impl_output):
actual_y = impl_output
expected_y = numpy_kde(*input_args, **input_kwargs)
np.testing.assert_allclose(expected_y, actual_y, rtol=1e-6, atol=1e-6)

0 comments on commit c90a013

Please sign in to comment.