Skip to content

Commit

Permalink
ENH: Extend multinomial and fix zipf
Browse files Browse the repository at this point in the history
Extend multinomial to allow broadcasting
Fix zipf changes missed in NumPy
Enable 0 as valid input for hypergeometric
  • Loading branch information
bashtage authored and mattip committed May 20, 2019
1 parent 9578dcf commit 0f3dd06
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 63 deletions.
3 changes: 3 additions & 0 deletions numpy/random/distributions.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,6 @@ cdef extern from "src/distributions/distributions.h":
np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
bint use_masked,
np.npy_bool *out) nogil

void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
double *pix, np.npy_intp d, binomial_t *binomial) nogil
71 changes: 49 additions & 22 deletions numpy/random/generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3642,7 +3642,7 @@ cdef class RandomGenerator:
x.shape = tuple(final_shape)
return x

def multinomial(self, np.npy_intp n, object pvals, size=None):
def multinomial(self, object n, object pvals, size=None):
"""
multinomial(n, pvals, size=None)
Expand All @@ -3658,7 +3658,7 @@ cdef class RandomGenerator:
Parameters
----------
n : int
n : int or array-like of ints
Number of experiments.
pvals : sequence of floats, length p
Probabilities of each of the ``p`` different outcomes. These
Expand Down Expand Up @@ -3697,6 +3697,18 @@ cdef class RandomGenerator:
For the first run, we threw 3 times 1, 4 times 2, etc. For the second,
we threw 2 times 1, 4 times 2, etc.
Now, do one experiment throwing the dice 10 time, and 10 times again,
and another throwing the dice 20 times, and 20 times again:
>>> np.random.multinomial([[10], [20]], [1/6.]*6, size=2)
array([[[2, 4, 0, 1, 2, 1],
[1, 3, 0, 3, 1, 2]],
[[1, 4, 4, 4, 4, 3],
[3, 3, 2, 5, 5, 2]]]) # random
The first array shows the outcomes of throwing the dice 10 times, and
the second shows the outcomes from throwing the dice 20 times.
A loaded die is more likely to land on number 6:
>>> np.random.multinomial(100, [1/7.]*5 + [2/7.])
Expand All @@ -3717,19 +3729,44 @@ cdef class RandomGenerator:
array([100, 0])
"""
cdef np.npy_intp d, i, j, dn, sz
cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"

cdef np.npy_intp d, i, sz, offset
cdef np.ndarray parr, mnarr, on, temp_arr
cdef double *pix
cdef int64_t *mnix
cdef double Sum
cdef int64_t ni
cdef np.broadcast it

d = len(pvals)
on = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED)
parr = <np.ndarray>np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED)
pix = <double*>np.PyArray_DATA(parr)

check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1)
if kahan_sum(pix, d-1) > (1.0 + 1e-12):
raise ValueError("sum(pvals[:-1]) > 1.0")

if np.PyArray_NDIM(on) != 0: # vector
check_array_constraint(on, 'n', CONS_NON_NEGATIVE)
if size is None:
it = np.PyArray_MultiIterNew1(on)
else:
temp = np.empty(size, dtype=np.int8)
temp_arr = <np.ndarray>temp
it = np.PyArray_MultiIterNew2(on, temp_arr)
shape = it.shape + (d,)
multin = np.zeros(shape, dtype=np.int64)
mnarr = <np.ndarray>multin
mnix = <int64_t*>np.PyArray_DATA(mnarr)
offset = 0
sz = it.size
with self.lock, nogil:
for i in range(sz):
ni = (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
offset += d
np.PyArray_MultiIter_NEXT(it)
return multin

if size is None:
shape = (d,)
else:
Expand All @@ -3742,23 +3779,13 @@ cdef class RandomGenerator:
mnarr = <np.ndarray>multin
mnix = <int64_t*>np.PyArray_DATA(mnarr)
sz = np.PyArray_SIZE(mnarr)

ni = n
check_constraint(ni, 'n', CONS_NON_NEGATIVE)
offset = 0
with self.lock, nogil:
i = 0
while i < sz:
Sum = 1.0
dn = n
for j in range(d-1):
mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn,
self._binomial)
dn = dn - mnix[i+j]
if dn <= 0:
break
Sum = Sum - pix[j]
if dn > 0:
mnix[i+d-1] = dn

i = i + d
for i in range(sz // d):
random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
offset += d

return multin

Expand Down
30 changes: 10 additions & 20 deletions numpy/random/mtrand.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3790,16 +3790,16 @@ cdef class RandomState:
array([100, 0])
"""
cdef np.npy_intp d, i, j, dn, sz
cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"
cdef np.npy_intp d, i, sz, offset
cdef np.ndarray parr, mnarr
cdef double *pix
cdef int64_t *mnix
cdef double Sum
cdef int64_t ni

d = len(pvals)
parr = <np.ndarray>np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED)
pix = <double*>np.PyArray_DATA(parr)

check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1)
if kahan_sum(pix, d-1) > (1.0 + 1e-12):
raise ValueError("sum(pvals[:-1]) > 1.0")

Expand All @@ -3815,23 +3815,13 @@ cdef class RandomState:
mnarr = <np.ndarray>multin
mnix = <int64_t*>np.PyArray_DATA(mnarr)
sz = np.PyArray_SIZE(mnarr)

ni = n
check_constraint(ni, 'n', CONS_NON_NEGATIVE)
offset = 0
with self.lock, nogil:
i = 0
while i < sz:
Sum = 1.0
dn = n
for j in range(d-1):
mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn,
self._binomial)
dn = dn - mnix[i+j]
if dn <= 0:
break
Sum = Sum - pix[j]
if dn > 0:
mnix[i+d-1] = dn

i = i + d
for i in range(sz // d):
random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
offset += d

return multin

Expand Down
52 changes: 39 additions & 13 deletions numpy/random/src/distributions/distributions.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,25 +1048,31 @@ int64_t random_geometric(brng_t *brng_state, double p) {
}

int64_t random_zipf(brng_t *brng_state, double a) {
double T, U, V;
int64_t X;
double am1, b;

am1 = a - 1.0;
b = pow(2.0, am1);
do {
U = 1.0 - next_double(brng_state);
V = next_double(brng_state);
X = (int64_t)floor(pow(U, -1.0 / am1));
/* The real result may be above what can be represented in a int64.
* It will get casted to -sys.maxint-1. Since this is
* a straightforward rejection algorithm, we can just reject this value
* in the rejection condition below. This function then models a Zipf
while (1) {
double T, U, V, X;

U = 1.0 - random_double(brng_state);
V = random_double(brng_state);
X = floor(pow(U, -1.0 / am1));
/*
* The real result may be above what can be represented in a signed
* long. Since this is a straightforward rejection algorithm, we can
* just reject this value. This function then models a Zipf
* distribution truncated to sys.maxint.
*/
if (X > LONG_MAX || X < 1.0) {
continue;
}

T = pow(1.0 + 1.0 / X, am1);
} while (((V * X * (T - 1.0) / (b - 1.0)) > (T / b)) || X < 1);
return X;
if (V * X * (T - 1.0) / (b - 1.0) <= T / b) {
return (long)X;
}
}
}

double random_triangular(brng_t *brng_state, double left, double mode,
Expand Down Expand Up @@ -1179,8 +1185,10 @@ int64_t random_hypergeometric(brng_t *brng_state, int64_t good, int64_t bad,
int64_t sample) {
if (sample > 10) {
return random_hypergeometric_hrua(brng_state, good, bad, sample);
} else {
} else if (sample > 0) {
return random_hypergeometric_hyp(brng_state, good, bad, sample);
} else {
return 0;
}
}

Expand Down Expand Up @@ -1809,3 +1817,21 @@ void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, npy_bool rng,
out[i] = buffered_bounded_bool(brng_state, off, rng, mask, &bcnt, &buf);
}
}

void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
double *pix, npy_intp d, binomial_t *binomial) {
double remaining_p = 1.0;
npy_intp j;
int64_t dn = n;
for (j = 0; j < (d - 1); j++) {
mnix[j] = random_binomial(brng_state, pix[j] / remaining_p, dn, binomial);
dn = dn - mnix[j];
if (dn <= 0) {
break;
}
remaining_p -= pix[j];
if (dn > 0) {
mnix[d - 1] = dn;
}
}
}
3 changes: 3 additions & 0 deletions numpy/random/src/distributions/distributions.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,4 +217,7 @@ DECLDIR void random_bounded_bool_fill(brng_t *brng_state, npy_bool off,
npy_bool rng, npy_intp cnt,
bool use_masked, npy_bool *out);

DECLDIR void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
double *pix, npy_intp d, binomial_t *binomial);

#endif
34 changes: 26 additions & 8 deletions numpy/random/tests/test_generator_mt19937.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ def test_size(self):

def test_invalid_prob(self):
assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2])
assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9])

def test_invalid_n(self):
assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2])
assert_raises(ValueError, random.multinomial, [-1] * 10, [0.8, 0.2])


class TestSetState(object):
Expand Down Expand Up @@ -804,8 +809,7 @@ def test_geometric_exceptions(self):
assert_raises(ValueError, random.geometric, [1.1] * 10)
assert_raises(ValueError, random.geometric, -0.1)
assert_raises(ValueError, random.geometric, [-0.1] * 10)
with suppress_warnings() as sup:
sup.record(RuntimeWarning)
with np.errstate(invalid='ignore'):
assert_raises(ValueError, random.geometric, np.nan)
assert_raises(ValueError, random.geometric, [np.nan] * 10)

Expand Down Expand Up @@ -888,8 +892,7 @@ def test_logseries(self):
assert_array_equal(actual, desired)

def test_logseries_exceptions(self):
with suppress_warnings() as sup:
sup.record(RuntimeWarning)
with np.errstate(invalid='ignore'):
assert_raises(ValueError, random.logseries, np.nan)
assert_raises(ValueError, random.logseries, [np.nan] * 10)

Expand Down Expand Up @@ -964,8 +967,7 @@ def test_negative_binomial(self):
assert_array_equal(actual, desired)

def test_negative_binomial_exceptions(self):
with suppress_warnings() as sup:
sup.record(RuntimeWarning)
with np.errstate(invalid='ignore'):
assert_raises(ValueError, random.negative_binomial, 100, np.nan)
assert_raises(ValueError, random.negative_binomial, 100,
[np.nan] * 10)
Expand Down Expand Up @@ -1046,8 +1048,7 @@ def test_poisson_exceptions(self):
assert_raises(ValueError, random.poisson, [lamneg] * 10)
assert_raises(ValueError, random.poisson, lambig)
assert_raises(ValueError, random.poisson, [lambig] * 10)
with suppress_warnings() as sup:
sup.record(RuntimeWarning)
with np.errstate(invalid='ignore'):
assert_raises(ValueError, random.poisson, np.nan)
assert_raises(ValueError, random.poisson, [np.nan] * 10)

Expand Down Expand Up @@ -1849,6 +1850,23 @@ def test_logseries(self):
assert_raises(ValueError, logseries, bad_p_one * 3)
assert_raises(ValueError, logseries, bad_p_two * 3)

def test_multinomial(self):
random.brng.seed(self.seed)
actual = random.multinomial([5, 20], [1 / 6.] * 6, size=(3, 2))
desired = np.array([[[1, 1, 1, 1, 0, 1],
[4, 5, 1, 4, 3, 3]],
[[1, 1, 1, 0, 0, 2],
[2, 0, 4, 3, 7, 4]],
[[1, 2, 0, 0, 2, 2],
[3, 2, 3, 4, 2, 6]]], dtype=np.int64)
assert_array_equal(actual, desired)

random.brng.seed(self.seed)
actual = random.multinomial([5, 20], [1 / 6.] * 6)
desired = np.array([[1, 1, 1, 1, 0, 1],
[4, 5, 1, 4, 3, 3]], dtype=np.int64)
assert_array_equal(actual, desired)


class TestThread(object):
# make sure each state produces the same sequence even in threads
Expand Down
4 changes: 4 additions & 0 deletions numpy/random/tests/test_randomstate.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def test_size(self):

def test_invalid_prob(self):
assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2])
assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9])

def test_invalid_n(self):
assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2])


class TestSetState(object):
Expand Down

0 comments on commit 0f3dd06

Please sign in to comment.