/
sample_halton_sequence_lib.py
391 lines (333 loc) · 17 KB
/
sample_halton_sequence_lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# Copyright 2018 The TensorFlow Probability Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Quasi Monte Carlo support: Halton sequence."""
# Dependency imports
import numpy as np
import tensorflow.compat.v2 as tf
from tensorflow_probability.python.internal import dtype_util
from tensorflow_probability.python.internal import prefer_static as ps
from tensorflow_probability.python.internal import samplers
__all__ = [
'sample_halton_sequence',
]
# The maximum dimension we support. This is limited by the number of primes
# in the _PRIMES array.
_MAX_DIMENSION = 10000
def sample_halton_sequence(dim,
num_results=None,
sequence_indices=None,
dtype=tf.float32,
randomized=True,
seed=None,
name=None):
r"""Returns a sample from the `dim` dimensional Halton sequence.
Warning: The sequence elements take values only between 0 and 1. Care must be
taken to appropriately transform the domain of a function if it differs from
the unit cube before evaluating integrals using Halton samples. It is also
important to remember that quasi-random numbers without randomization are not
a replacement for pseudo-random numbers in every context. Quasi random numbers
are completely deterministic and typically have significant negative
autocorrelation unless randomization is used.
Computes the members of the low discrepancy Halton sequence in dimension
`dim`. The `dim`-dimensional sequence takes values in the unit hypercube in
`dim` dimensions. Currently, only dimensions up to 10000 are supported. The
prime base for the k-th axes is the k-th prime starting from 2. For example,
if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first
element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more
complete description of the Halton sequences see
[here](https://en.wikipedia.org/wiki/Halton_sequence). For low discrepancy
sequences and their applications see
[here](https://en.wikipedia.org/wiki/Low-discrepancy_sequence).
If `randomized` is true, this function produces a scrambled version of the
Halton sequence introduced by [Owen (2017)][1]. For the advantages of
randomization of low discrepancy sequences see [here](
https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo).
The number of samples produced is controlled by the `num_results` and
`sequence_indices` parameters. The user must supply either `num_results` or
`sequence_indices` but not both.
The former is the number of samples to produce starting from the first
element. If `sequence_indices` is given instead, the specified elements of
the sequence are generated. For example, sequence_indices=tf.range(10) is
equivalent to specifying n=10.
#### Examples
```python
import tensorflow as tf
import tensorflow_probability as tfp
# Produce the first 1000 members of the Halton sequence in 3 dimensions.
num_results = 1000
dim = 3
sample = tfp.mcmc.sample_halton_sequence(
dim,
num_results=num_results,
seed=127)
# Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional
# hypercube.
powers = tf.range(1.0, limit=dim + 1)
integral = tf.reduce_mean(tf.reduce_prod(sample ** powers, axis=-1))
true_value = 1.0 / tf.reduce_prod(powers + 1.0)
with tf.Session() as session:
values = session.run((integral, true_value))
# Produces a relative absolute error of 1.7%.
print ("Estimated: %f, True Value: %f" % values)
# Now skip the first 1000 samples and recompute the integral with the next
# thousand samples. The sequence_indices argument can be used to do this.
sequence_indices = tf.range(start=1000, limit=1000 + num_results,
dtype=tf.int32)
sample_leaped = tfp.mcmc.sample_halton_sequence(
dim,
sequence_indices=sequence_indices,
seed=111217)
integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers,
axis=-1))
with tf.Session() as session:
values = session.run((integral_leaped, true_value))
# Now produces a relative absolute error of 0.05%.
print ("Leaped Estimated: %f, True Value: %f" % values)
```
Args:
dim: Positive Python `int` representing each sample's `event_size.` Must
not be greater than 10000.
num_results: (Optional) Positive scalar `Tensor` of dtype int32. The number
of samples to generate. Either this parameter or sequence_indices must
be specified but not both. If this parameter is None, then the behaviour
is determined by the `sequence_indices`.
Default value: `None`.
sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The
elements of the sequence to compute specified by their position in the
sequence. The entries index into the Halton sequence starting with 0 and
hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will
produce the first, sixth and seventh elements of the sequence. If this
parameter is None, then the `num_results` parameter must be specified
which gives the number of desired samples starting from the first sample.
Default value: `None`.
dtype: (Optional) The dtype of the sample. One of: `float16`, `float32` or
`float64`.
Default value: `tf.float32`.
randomized: (Optional) bool indicating whether to produce a randomized
Halton sequence. If True, applies the randomization described in
[Owen (2017)][1].
Default value: `True`.
seed: PRNG seed; see `tfp.random.sanitize_seed` for details. Only used if
`randomized` is True. If not supplied and `randomized` is True, no seed is
set.
Default value: `None`.
name: (Optional) Python `str` describing ops managed by this function. If
not supplied the name of this function is used.
Default value: "sample_halton_sequence".
Returns:
halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype
and `shape` `[num_results, dim]` if `num_results` was specified or shape
`[s, dim]` where s is the size of `sequence_indices` if `sequence_indices`
were specified.
Raises:
ValueError: if both `sequence_indices` and `num_results` were specified or
if dimension `dim` is less than 1 or greater than 10000.
#### References
[1]: Art B. Owen. A randomized Halton algorithm in R. _arXiv preprint
arXiv:1706.02808_, 2017. https://arxiv.org/abs/1706.02808
"""
if dim < 1 or dim > _MAX_DIMENSION:
raise ValueError(
'Dimension must be between 1 and {}. Supplied {}'.format(_MAX_DIMENSION,
dim))
if (num_results is None) == (sequence_indices is None):
raise ValueError('Either `num_results` or `sequence_indices` must be'
' specified but not both.')
if not dtype_util.is_floating(dtype):
raise ValueError('dtype must be of `float`-type')
with tf.name_scope(name or 'sample'):
# Here and in the following, the shape layout is as follows:
# [sample dimension, event dimension, coefficient dimension].
# The coefficient dimension is an intermediate axes which will hold the
# weights of the starting integer when expressed in the (prime) base for
# an event dimension.
if sequence_indices is not None:
sequence_indices = tf.convert_to_tensor(sequence_indices)
indices = _get_indices(num_results, sequence_indices, dtype)
if num_results is None:
num_results = ps.reduce_max(indices)
radixes = _PRIMES[0:dim][..., np.newaxis]
max_sizes_by_axes = _base_expansion_size(num_results, radixes, dtype)
max_size = ps.reduce_max(max_sizes_by_axes)
# The powers of the radixes that we will need. Note that there is a bit
# of an excess here. Suppose we need the place value coefficients of 7
# in base 2 and 3. For 2, we will have 3 digits but we only need 2 digits
# for base 3. However, we can only create rectangular tensors so we
# store both expansions in a [2, 3] tensor. This leads to the problem that
# we might end up attempting to raise large numbers to large powers. For
# example, base 2 expansion of 1024 has 10 digits. If we were in 10
# dimensions, then the 10th prime (29) we will end up computing 29^10 even
# though we don't need it. We avoid this by setting the exponents for each
# axes to 0 beyond the maximum value needed for that dimension.
exponents_by_axes = tf.tile([tf.range(max_size, dtype=dtype)], [dim, 1])
# The mask is true for those coefficients that are irrelevant.
weight_mask = exponents_by_axes < max_sizes_by_axes
capped_exponents = tf.where(
weight_mask, exponents_by_axes, dtype_util.as_numpy_dtype(dtype)(0.))
weights = tf.cast(radixes ** capped_exponents, dtype=dtype)
# The following computes the base b expansion of the indices. Suppose,
# x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with
# the vector (1, b, b^2, b^3, ...) will produce
# (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care
# about. Noting that all a_i < b by definition of place value expansion,
# we see that taking the elements mod b of the above vector produces the
# place value expansion coefficients.
coeffs = tf.math.floordiv(indices, weights)
coeffs *= tf.cast(weight_mask, dtype)
coeffs %= radixes
if not randomized:
coeffs /= radixes
return tf.reduce_sum(coeffs / weights, axis=-1)
shuffle_seed, zero_correction_seed = samplers.split_seed(
seed, salt='MCMCSampleHaltonSequence')
coeffs = _randomize(coeffs, radixes, seed=shuffle_seed)
# Remove the contribution from randomizing the trailing zero for the
# axes where max_size_by_axes < max_size. This will be accounted
# for separately below (using zero_correction).
coeffs *= tf.cast(weight_mask, dtype)
coeffs /= radixes
base_values = tf.reduce_sum(coeffs / weights, axis=-1)
# The randomization used in Owen (2017) does not leave 0 invariant. While
# we have accounted for the randomization of the first `max_size_by_axes`
# coefficients, we still need to correct for the trailing zeros. Luckily,
# this is equivalent to adding a uniform random value scaled so the first
# `max_size_by_axes` coefficients are zero. The following statements perform
# this correction.
zero_correction = samplers.uniform([dim, 1],
seed=zero_correction_seed,
dtype=dtype)
zero_correction /= tf.cast(radixes ** max_sizes_by_axes, dtype)
return base_values + tf.reshape(zero_correction, [-1])
def _randomize(coeffs, radixes, seed=None):
"""Applies the Owen (2017) randomization to the coefficients."""
given_dtype = coeffs.dtype
coeffs = tf.cast(coeffs, dtype=tf.int32)
num_coeffs = ps.shape(coeffs)[-1]
perms = _get_permutations(num_coeffs, np.squeeze(radixes, axis=-1), seed=seed)
perms = tf.reshape(perms, shape=[-1])
radixes = tf.reshape(tf.cast(radixes, dtype=tf.int32), shape=[-1])
radix_sum = tf.reduce_sum(radixes)
radix_offsets = tf.reshape(tf.cumsum(radixes, exclusive=True),
shape=[-1, 1])
offsets = radix_offsets + ps.range(num_coeffs, dtype=tf.int32) * radix_sum
permuted_coeffs = tf.gather(perms, coeffs + offsets)
return tf.cast(permuted_coeffs, dtype=given_dtype)
def _get_permutations(num_results, dims, seed=None):
"""Uniform iid sample from the space of permutations.
Draws a sample of size `num_results` from the group of permutations of degrees
specified by the `dims` tensor. These are packed together into one tensor
such that each row is one sample from each of the dimensions in `dims`. For
example, if dims = [2,3] and num_results = 2, the result is a tensor of shape
[2, 2 + 3] and the first row of the result might look like:
[1, 0, 2, 0, 1]. The first two elements are a permutation over 2 elements
while the next three are a permutation over 3 elements.
Args:
num_results: A positive scalar `Tensor` of integral type. The number of
draws from the discrete uniform distribution over the permutation groups.
dims: A 1D numpy array of the same dtype as `num_results`. The degree of the
permutation groups from which to sample.
seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
Returns:
permutations: A `Tensor` of shape `[num_results, sum(dims)]` and the same
dtype as `dims`.
"""
n = dims.size
max_size = np.max(dims)
samples = samplers.uniform([num_results, n, max_size], seed=seed)
should_mask = np.arange(max_size) >= dims[..., np.newaxis]
# Choose a number that does not affect the permutation and relative location.
samples = tf.where(
should_mask,
dtype_util.as_numpy_dtype(samples.dtype)(np.arange(max_size) + 10.),
samples)
samples = tf.argsort(samples, axis=-1)
# Generate the set of indices to gather.
should_mask = np.tile(should_mask, [num_results, 1, 1])
indices = np.stack(np.where(~should_mask), axis=-1)
return tf.gather_nd(samples, indices)
def _get_indices(num_results, sequence_indices, dtype, name=None):
"""Generates starting points for the Halton sequence procedure.
The k'th element of the sequence is generated starting from a positive integer
which must be distinct for each `k`. It is conventional to choose the starting
point as `k` itself (or `k+1` if k is zero based). This function generates
the starting integers for the required elements and reshapes the result for
later use.
Args:
num_results: Positive scalar `Tensor` of dtype int32. The number of samples
to generate. If this parameter is supplied, then `sequence_indices`
should be None.
sequence_indices: `Tensor` of dtype int32 and rank 1. The entries
index into the Halton sequence starting with 0 and hence, must be whole
numbers. For example, sequence_indices=[0, 5, 6] will produce the first,
sixth and seventh elements of the sequence. If this parameter is not None
then `n` must be None.
dtype: The dtype of the sample. One of `float32` or `float64`.
Default is `float32`.
name: Python `str` name which describes ops created by this function.
Returns:
indices: `Tensor` of dtype `dtype` and shape = `[n, 1, 1]`.
"""
with tf.name_scope(name or 'get_indices'):
if sequence_indices is None:
np_dtype = dtype_util.as_numpy_dtype(dtype)
num_results_ = tf.get_static_value(num_results)
if num_results_ is not None:
sequence_indices = ps.range(np_dtype(num_results_), dtype=dtype)
else:
num_results = tf.cast(num_results, dtype=dtype)
sequence_indices = ps.range(num_results, dtype=dtype)
else:
sequence_indices = tf.cast(sequence_indices, dtype)
# Shift the indices so they are 1 based.
indices = sequence_indices + 1
# Reshape to make space for the event dimension and the place value
# coefficients.
return tf.reshape(indices, [-1, 1, 1])
def _base_expansion_size(num, bases, dtype):
"""Computes the number of terms in the place value expansion.
Let num = a0 + a1 b + a2 b^2 + ... ak b^k be the place value expansion of
`num` in base b (ak <> 0). This function computes and returns `k+1` for each
base `b` specified in `bases`.
This can be inferred from the base `b` logarithm of `num` as follows:
$$k = Floor(log_b (num)) + 1 = Floor( log(num) / log(b)) + 1$$
Args:
num: Scalar `Tensor` of dtype either `int32` or `int64`. The number to
compute the base expansion size of.
bases: `Tensor` of the same dtype as num. The bases to compute the size
against.
dtype: Return `dtype`.
Returns:
Tensor of dtype `dtype` and shape as `bases` containing the size of num when
written in that base.
"""
num_ = tf.get_static_value(num)
if num_ is not None:
return (np.floor(np.log(num_) / np.log(bases)) + 1).astype(
dtype_util.as_numpy_dtype(dtype))
return tf.floor(
tf.math.log(tf.cast(num, dtype)) / tf.math.log(tf.cast(bases, dtype))) + 1
def _primes_less_than(n):
"""Returns sorted array of primes such that `2 <= prime < n`."""
primes = np.ones((n + 1) // 2, dtype=bool)
j = 3
while j * j <= n:
if primes[j//2]:
primes[j*j//2::j] = False
j += 2
ret = 2 * np.where(primes)[0] + 1
ret[0] = 2 # :(
return ret
_PRIMES = _primes_less_than(104729 + 1)
assert len(_PRIMES) == _MAX_DIMENSION