diff --git a/numpy/random/distributions.pxd b/numpy/random/distributions.pxd index ddb7a84bf190..5047aed9a325 100644 --- a/numpy/random/distributions.pxd +++ b/numpy/random/distributions.pxd @@ -144,3 +144,6 @@ cdef extern from "src/distributions/distributions.h": np.npy_bool off, np.npy_bool rng, np.npy_intp cnt, bint use_masked, np.npy_bool *out) nogil + + void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix, + double *pix, np.npy_intp d, binomial_t *binomial) nogil diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx index 7f18392a89c3..29a36787ebbc 100644 --- a/numpy/random/generator.pyx +++ b/numpy/random/generator.pyx @@ -3642,7 +3642,7 @@ cdef class RandomGenerator: x.shape = tuple(final_shape) return x - def multinomial(self, np.npy_intp n, object pvals, size=None): + def multinomial(self, object n, object pvals, size=None): """ multinomial(n, pvals, size=None) @@ -3658,7 +3658,7 @@ cdef class RandomGenerator: Parameters ---------- - n : int + n : int or array-like of ints Number of experiments. pvals : sequence of floats, length p Probabilities of each of the ``p`` different outcomes. These @@ -3697,6 +3697,18 @@ cdef class RandomGenerator: For the first run, we threw 3 times 1, 4 times 2, etc. For the second, we threw 2 times 1, 4 times 2, etc. + Now, do one experiment throwing the dice 10 time, and 10 times again, + and another throwing the dice 20 times, and 20 times again: + + >>> np.random.multinomial([[10], [20]], [1/6.]*6, size=2) + array([[[2, 4, 0, 1, 2, 1], + [1, 3, 0, 3, 1, 2]], + [[1, 4, 4, 4, 4, 3], + [3, 3, 2, 5, 5, 2]]]) # random + + The first array shows the outcomes of throwing the dice 10 times, and + the second shows the outcomes from throwing the dice 20 times. + A loaded die is more likely to land on number 6: >>> np.random.multinomial(100, [1/7.]*5 + [2/7.]) @@ -3717,19 +3729,44 @@ cdef class RandomGenerator: array([100, 0]) """ - cdef np.npy_intp d, i, j, dn, sz - cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr" + + cdef np.npy_intp d, i, sz, offset + cdef np.ndarray parr, mnarr, on, temp_arr cdef double *pix cdef int64_t *mnix - cdef double Sum + cdef int64_t ni + cdef np.broadcast it d = len(pvals) + on = np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED) parr = np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED) pix = np.PyArray_DATA(parr) - + check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1) if kahan_sum(pix, d-1) > (1.0 + 1e-12): raise ValueError("sum(pvals[:-1]) > 1.0") + if np.PyArray_NDIM(on) != 0: # vector + check_array_constraint(on, 'n', CONS_NON_NEGATIVE) + if size is None: + it = np.PyArray_MultiIterNew1(on) + else: + temp = np.empty(size, dtype=np.int8) + temp_arr = temp + it = np.PyArray_MultiIterNew2(on, temp_arr) + shape = it.shape + (d,) + multin = np.zeros(shape, dtype=np.int64) + mnarr = multin + mnix = np.PyArray_DATA(mnarr) + offset = 0 + sz = it.size + with self.lock, nogil: + for i in range(sz): + ni = (np.PyArray_MultiIter_DATA(it, 0))[0] + random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial) + offset += d + np.PyArray_MultiIter_NEXT(it) + return multin + if size is None: shape = (d,) else: @@ -3742,23 +3779,13 @@ cdef class RandomGenerator: mnarr = multin mnix = np.PyArray_DATA(mnarr) sz = np.PyArray_SIZE(mnarr) - + ni = n + check_constraint(ni, 'n', CONS_NON_NEGATIVE) + offset = 0 with self.lock, nogil: - i = 0 - while i < sz: - Sum = 1.0 - dn = n - for j in range(d-1): - mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn, - self._binomial) - dn = dn - mnix[i+j] - if dn <= 0: - break - Sum = Sum - pix[j] - if dn > 0: - mnix[i+d-1] = dn - - i = i + d + for i in range(sz // d): + random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial) + offset += d return multin diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index 145af4c7cdb1..351d05e93b97 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -3790,16 +3790,16 @@ cdef class RandomState: array([100, 0]) """ - cdef np.npy_intp d, i, j, dn, sz - cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr" + cdef np.npy_intp d, i, sz, offset + cdef np.ndarray parr, mnarr cdef double *pix cdef int64_t *mnix - cdef double Sum + cdef int64_t ni d = len(pvals) parr = np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED) pix = np.PyArray_DATA(parr) - + check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1) if kahan_sum(pix, d-1) > (1.0 + 1e-12): raise ValueError("sum(pvals[:-1]) > 1.0") @@ -3815,23 +3815,13 @@ cdef class RandomState: mnarr = multin mnix = np.PyArray_DATA(mnarr) sz = np.PyArray_SIZE(mnarr) - + ni = n + check_constraint(ni, 'n', CONS_NON_NEGATIVE) + offset = 0 with self.lock, nogil: - i = 0 - while i < sz: - Sum = 1.0 - dn = n - for j in range(d-1): - mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn, - self._binomial) - dn = dn - mnix[i+j] - if dn <= 0: - break - Sum = Sum - pix[j] - if dn > 0: - mnix[i+d-1] = dn - - i = i + d + for i in range(sz // d): + random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial) + offset += d return multin diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c index 83806de38997..5f49b68be4c7 100644 --- a/numpy/random/src/distributions/distributions.c +++ b/numpy/random/src/distributions/distributions.c @@ -1048,25 +1048,31 @@ int64_t random_geometric(brng_t *brng_state, double p) { } int64_t random_zipf(brng_t *brng_state, double a) { - double T, U, V; - int64_t X; double am1, b; am1 = a - 1.0; b = pow(2.0, am1); - do { - U = 1.0 - next_double(brng_state); - V = next_double(brng_state); - X = (int64_t)floor(pow(U, -1.0 / am1)); - /* The real result may be above what can be represented in a int64. - * It will get casted to -sys.maxint-1. Since this is - * a straightforward rejection algorithm, we can just reject this value - * in the rejection condition below. This function then models a Zipf + while (1) { + double T, U, V, X; + + U = 1.0 - random_double(brng_state); + V = random_double(brng_state); + X = floor(pow(U, -1.0 / am1)); + /* + * The real result may be above what can be represented in a signed + * long. Since this is a straightforward rejection algorithm, we can + * just reject this value. This function then models a Zipf * distribution truncated to sys.maxint. */ + if (X > LONG_MAX || X < 1.0) { + continue; + } + T = pow(1.0 + 1.0 / X, am1); - } while (((V * X * (T - 1.0) / (b - 1.0)) > (T / b)) || X < 1); - return X; + if (V * X * (T - 1.0) / (b - 1.0) <= T / b) { + return (long)X; + } + } } double random_triangular(brng_t *brng_state, double left, double mode, @@ -1179,8 +1185,10 @@ int64_t random_hypergeometric(brng_t *brng_state, int64_t good, int64_t bad, int64_t sample) { if (sample > 10) { return random_hypergeometric_hrua(brng_state, good, bad, sample); - } else { + } else if (sample > 0) { return random_hypergeometric_hyp(brng_state, good, bad, sample); + } else { + return 0; } } @@ -1809,3 +1817,21 @@ void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, npy_bool rng, out[i] = buffered_bounded_bool(brng_state, off, rng, mask, &bcnt, &buf); } } + +void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix, + double *pix, npy_intp d, binomial_t *binomial) { + double remaining_p = 1.0; + npy_intp j; + int64_t dn = n; + for (j = 0; j < (d - 1); j++) { + mnix[j] = random_binomial(brng_state, pix[j] / remaining_p, dn, binomial); + dn = dn - mnix[j]; + if (dn <= 0) { + break; + } + remaining_p -= pix[j]; + if (dn > 0) { + mnix[d - 1] = dn; + } + } +} diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/src/distributions/distributions.h index 7ca31a16cba6..8ec4a83e8b3e 100644 --- a/numpy/random/src/distributions/distributions.h +++ b/numpy/random/src/distributions/distributions.h @@ -217,4 +217,7 @@ DECLDIR void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, npy_bool rng, npy_intp cnt, bool use_masked, npy_bool *out); +DECLDIR void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix, + double *pix, npy_intp d, binomial_t *binomial); + #endif diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index 3eda84d84d4f..895e7fc6cf4a 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -90,6 +90,11 @@ def test_size(self): def test_invalid_prob(self): assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2]) + assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9]) + + def test_invalid_n(self): + assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2]) + assert_raises(ValueError, random.multinomial, [-1] * 10, [0.8, 0.2]) class TestSetState(object): @@ -804,8 +809,7 @@ def test_geometric_exceptions(self): assert_raises(ValueError, random.geometric, [1.1] * 10) assert_raises(ValueError, random.geometric, -0.1) assert_raises(ValueError, random.geometric, [-0.1] * 10) - with suppress_warnings() as sup: - sup.record(RuntimeWarning) + with np.errstate(invalid='ignore'): assert_raises(ValueError, random.geometric, np.nan) assert_raises(ValueError, random.geometric, [np.nan] * 10) @@ -888,8 +892,7 @@ def test_logseries(self): assert_array_equal(actual, desired) def test_logseries_exceptions(self): - with suppress_warnings() as sup: - sup.record(RuntimeWarning) + with np.errstate(invalid='ignore'): assert_raises(ValueError, random.logseries, np.nan) assert_raises(ValueError, random.logseries, [np.nan] * 10) @@ -964,8 +967,7 @@ def test_negative_binomial(self): assert_array_equal(actual, desired) def test_negative_binomial_exceptions(self): - with suppress_warnings() as sup: - sup.record(RuntimeWarning) + with np.errstate(invalid='ignore'): assert_raises(ValueError, random.negative_binomial, 100, np.nan) assert_raises(ValueError, random.negative_binomial, 100, [np.nan] * 10) @@ -1046,8 +1048,7 @@ def test_poisson_exceptions(self): assert_raises(ValueError, random.poisson, [lamneg] * 10) assert_raises(ValueError, random.poisson, lambig) assert_raises(ValueError, random.poisson, [lambig] * 10) - with suppress_warnings() as sup: - sup.record(RuntimeWarning) + with np.errstate(invalid='ignore'): assert_raises(ValueError, random.poisson, np.nan) assert_raises(ValueError, random.poisson, [np.nan] * 10) @@ -1849,6 +1850,23 @@ def test_logseries(self): assert_raises(ValueError, logseries, bad_p_one * 3) assert_raises(ValueError, logseries, bad_p_two * 3) + def test_multinomial(self): + random.brng.seed(self.seed) + actual = random.multinomial([5, 20], [1 / 6.] * 6, size=(3, 2)) + desired = np.array([[[1, 1, 1, 1, 0, 1], + [4, 5, 1, 4, 3, 3]], + [[1, 1, 1, 0, 0, 2], + [2, 0, 4, 3, 7, 4]], + [[1, 2, 0, 0, 2, 2], + [3, 2, 3, 4, 2, 6]]], dtype=np.int64) + assert_array_equal(actual, desired) + + random.brng.seed(self.seed) + actual = random.multinomial([5, 20], [1 / 6.] * 6) + desired = np.array([[1, 1, 1, 1, 0, 1], + [4, 5, 1, 4, 3, 3]], dtype=np.int64) + assert_array_equal(actual, desired) + class TestThread(object): # make sure each state produces the same sequence even in threads diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index d0ad5e794fa8..8d10823d6a57 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -112,6 +112,10 @@ def test_size(self): def test_invalid_prob(self): assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2]) + assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9]) + + def test_invalid_n(self): + assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2]) class TestSetState(object):