Skip to content

Commit

Permalink
Merge pull request #13918 from diregoblin/master
Browse files Browse the repository at this point in the history
ENH: Poisson Disk sampling for QMC
  • Loading branch information
tylerjereddy committed May 23, 2022
2 parents 62f75fa + 8889fbd commit 1451b2c
Show file tree
Hide file tree
Showing 3 changed files with 378 additions and 5 deletions.
304 changes: 303 additions & 1 deletion scipy/stats/_qmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import scipy.stats as stats
from scipy._lib._util import rng_integers
from scipy.spatial import distance, Voronoi
from scipy.special import gammainc
from ._sobol import (
_initialize_v, _cscramble, _fill_p_cumulative, _draw, _fast_forward,
_categorize, _MAXDIM
Expand All @@ -46,7 +47,7 @@


__all__ = ['scale', 'discrepancy', 'update_discrepancy',
'QMCEngine', 'Sobol', 'Halton', 'LatinHypercube',
'QMCEngine', 'Sobol', 'Halton', 'LatinHypercube', 'PoissonDisk',
'MultinomialQMC', 'MultivariateNormalQMC']


Expand Down Expand Up @@ -1595,6 +1596,307 @@ def fast_forward(self, n: IntNumber) -> Sobol:
return self


class PoissonDisk(QMCEngine):
"""Poisson disk sampling.
Parameters
----------
d : int
Dimension of the parameter space.
radius : float
Minimal distance to keep between points when sampling new candidates.
hypersphere : {"volume", "surface"}, optional
Sampling strategy to generate potential candidates to be added in the
final sample. Default is "volume".
* ``volume``: original Bridson algorithm as described in [1]_.
New candidates are sampled *within* the hypersphere.
* ``surface``: only sample the surface of the hypersphere.
ncandidates : int
Number of candidates to sample per iteration. More candidates result
in a denser sampling as more candidates can be accepted per iteration.
seed : {None, int, `numpy.random.Generator`}, optional
If `seed` is None the `numpy.random.Generator` singleton is used.
If `seed` is an int, a new ``Generator`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` instance then that instance is
used.
Notes
-----
Poisson disk sampling is an iterative sampling strategy. Starting from
a seed sample, `ncandidates` are sampled in the hypersphere
surrounding the seed. Candidates bellow a certain `radius` or outside the
domain are rejected. New samples are added in a pool of sample seed. The
process stops when the pool is empty or when the number of required
samples is reached.
The maximum number of point that a sample can contain is directly linked
to the `radius`. As the dimension of the space increases, a higher radius
spreads the points further and help overcome the curse of dimensionality.
See the :ref:`quasi monte carlo tutorial <quasi-monte-carlo>` for more
details.
.. warning::
The algorithm is more suitable for low dimensions and sampling size
due to its iterative nature and memory requirements.
Selecting a small radius with a high dimension would
mean that the space could contain more samples than using lower
dimension or a bigger radius.
Some code taken from [2]_, written consent given on 31.03.2021
by the original author, Shamis, for free use in SciPy under
the 3-clause BSD.
References
----------
.. [1] Robert Bridson, "Fast Poisson Disk Sampling in Arbitrary
Dimensions." SIGGRAPH, 2007.
.. [2] `StackOverflow <https://stackoverflow.com/questions/66047540>`__.
Examples
--------
Generate a 2D sample using a `radius` of 0.2.
>>> import matplotlib.pyplot as plt
>>> from matplotlib.collections import PatchCollection
>>> from scipy.stats import qmc
>>>
>>> rng = np.random.default_rng()
>>> radius = 0.2
>>> engine = qmc.PoissonDisk(d=2, radius=0.2, seed=rng)
>>> sample = engine.random(20)
Visualizing the 2D sample and showing that no points are closer than
`radius`. ``radius/2`` is used to visualize non-intersecting circles.
If two samples are exactly at `radius` from each other, then their circle
of radius ``radius/2`` will touch.
>>> fig, ax = plt.subplots()
>>> _ = ax.scatter(sample[:, 0], sample[:, 1])
>>> circles = [plt.Circle((xi, yi), radius=radius/2, fill=False)
... for xi, yi in sample]
>>> collection = PatchCollection(circles, match_original=True)
>>> ax.add_collection(collection)
>>> _ = ax.set(aspect='equal', xlabel=r'$x_1$', ylabel=r'$x_2$',
... xlim=[0, 1], ylim=[0, 1])
>>> plt.show()
Such visualization can be seen as circle packing: how many circle can
we put in the space. It is a np-hard problem. The method `fill_space`
can be used to add samples until no more samples can be added. This is
a hard problem and parameters may need to be adjusted manually. Beware of
the dimension: as the dimensionality increases, the number of samples
required to fill the space increases exponentially
(curse-of-dimensionality).
"""

def __init__(
self,
d: IntNumber,
*,
radius: DecimalNumber = 0.05,
hypersphere: Literal["volume", "surface"] = "volume",
ncandidates: IntNumber = 30,
seed: SeedType = None
) -> None:
super().__init__(d=d, seed=seed)

hypersphere_sample = {
"volume": self._hypersphere_volume_sample,
"surface": self._hypersphere_surface_sample
}

try:
self.hypersphere_method = hypersphere_sample[hypersphere]
except KeyError as exc:
message = (
f"{hypersphere!r} is not a valid hypersphere sampling"
f" method. It must be one of {set(hypersphere_sample)!r}")
raise ValueError(message) from exc

# size of the sphere from which the samples are drawn relative to the
# size of a disk (radius)
# for the surface sampler, all new points are almost exactly 1 radius
# away from at least one existing sample +eps to avoid rejection
self.radius_factor = 2 if hypersphere == "volume" else 1.001
self.radius = radius
self.radius_squared = self.radius**2

# sample to generate per iteration in the hypersphere around center
self.ncandidates = ncandidates

with np.errstate(divide='ignore'):
self.cell_size = self.radius / np.sqrt(self.d)
self.grid_size = (
np.ceil(np.ones(self.d) / self.cell_size)
).astype(int)

self._initialize_grid_pool()

def _initialize_grid_pool(self):
"""Sampling pool and sample grid."""
self.sample_pool = []
# Positions of cells
# n-dim value for each grid cell
self.sample_grid = np.empty(
np.append(self.grid_size, self.d),
dtype=np.float32
)
# Initialise empty cells with NaNs
self.sample_grid.fill(np.nan)

def random(self, n: IntNumber = 1) -> np.ndarray:
"""Draw `n` in the interval ``[0, 1]``.
Note that it can return fewer samples if the space is full.
See the note section of the class.
Parameters
----------
n : int, optional
Number of samples to generate in the parameter space. Default is 1.
Returns
-------
sample : array_like (n, d)
QMC sample.
"""
if n == 0 or self.d == 0:
return np.empty((n, self.d))

def in_limits(sample: np.ndarray) -> bool:
return (sample.max() <= 1.) and (sample.min() >= 0.)

def in_neighborhood(candidate: np.ndarray, n: int = 2) -> bool:
"""
Check if there are samples closer than ``radius_squared`` to the
`candidate` sample.
"""
indices = (candidate / self.cell_size).astype(int)
ind_min = np.maximum(indices - n, np.zeros(self.d, dtype=int))
ind_max = np.minimum(indices + n + 1, self.grid_size)

# Check if the center cell is empty
if not np.isnan(self.sample_grid[tuple(indices)][0]):
return True

a = [slice(ind_min[i], ind_max[i]) for i in range(self.d)]

# guards against: invalid value encountered in less as we are
# comparing with nan and returns False. Which is wanted.
with np.errstate(invalid='ignore'):
if np.any(
np.sum(
np.square(candidate - self.sample_grid[tuple(a)]),
axis=self.d
) < self.radius_squared
):
return True

return False

def add_sample(candidate: np.ndarray) -> None:
self.sample_pool.append(candidate)
indices = (candidate / self.cell_size).astype(int)
self.sample_grid[tuple(indices)] = candidate
curr_sample.append(candidate)

curr_sample: List[np.ndarray] = []

if len(self.sample_pool) == 0:
# the pool is being initialized with a single random sample
add_sample(self.rng.random(self.d))
num_drawn = 1
else:
num_drawn = 0

# exhaust sample pool to have up to n sample
while len(self.sample_pool) and num_drawn < n:
# select a sample from the available pool
idx_center = rng_integers(self.rng, len(self.sample_pool))
center = self.sample_pool[idx_center]
del self.sample_pool[idx_center]

# generate candidates around the center sample
candidates = self.hypersphere_method(
center, self.radius * self.radius_factor, self.ncandidates
)

# keep candidates that satisfy some conditions
for candidate in candidates:
if in_limits(candidate) and not in_neighborhood(candidate):
add_sample(candidate)

num_drawn += 1
if num_drawn >= n:
break

self.num_generated += num_drawn
return np.array(curr_sample)

def fill_space(self) -> np.ndarray:
"""Draw ``n`` samples in the interval ``[0, 1]``.
Unlike `random`, this method will try to add points until
the space is full. Depending on ``candidates`` (and to a lesser extent
other parameters), some empty areas can still be present in the sample.
.. warning::
This can be extremely slow in high dimensions or if the
``radius`` is very small-with respect to the dimensionality.
Returns
-------
sample : array_like (n, d)
QMC sample.
"""
return self.random(np.inf) # type: ignore[arg-type]

def reset(self) -> PoissonDisk:
"""Reset the engine to base state.
Returns
-------
engine : PoissonDisk
Engine reset to its base state.
"""
super().reset()
self._initialize_grid_pool()
return self

def _hypersphere_volume_sample(
self, center: np.ndarray, radius: DecimalNumber,
candidates: IntNumber = 1
) -> np.ndarray:
"""Uniform sampling within hypersphere."""
# should remove samples within r/2
x = self.rng.standard_normal(size=(candidates, self.d))
ssq = np.sum(x**2, axis=1)
fr = radius * gammainc(self.d/2, ssq/2)**(1/self.d) / np.sqrt(ssq)
fr_tiled = np.tile(
fr.reshape(-1, 1), (1, self.d) # type: ignore[arg-type]
)
p = center + np.multiply(x, fr_tiled)
return p

def _hypersphere_surface_sample(
self, center: np.ndarray, radius: DecimalNumber,
candidates: IntNumber = 1
) -> np.ndarray:
"""Uniform sampling on the hypersphere's surface."""
vec = self.rng.standard_normal(size=(candidates, self.d))
vec /= np.linalg.norm(vec, axis=1)[:, None]
p = center + np.multiply(vec, radius)
return p


class MultivariateNormalQMC:
r"""QMC sampling from a multivariate Normal :math:`N(\mu, \Sigma)`.
Expand Down
1 change: 1 addition & 0 deletions scipy/stats/qmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Sobol
Halton
LatinHypercube
PoissonDisk
MultinomialQMC
MultivariateNormalQMC
Expand Down
Loading

0 comments on commit 1451b2c

Please sign in to comment.