ENH: Extend multinomial and fix zipf

Extend multinomial to allow broadcasting Fix zipf changes missed in NumPy Enable 0 as valid input for hypergeometric
numpy · May 20, 2019 · 0f3dd06 · 0f3dd06
1 parent 9578dcf
commit 0f3dd06
Show file tree

Hide file tree

Showing 7 changed files with 134 additions and 63 deletions.
diff --git a/numpy/random/distributions.pxd b/numpy/random/distributions.pxd
@@ -144,3 +144,6 @@ cdef extern from "src/distributions/distributions.h":
                                   np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
                                   bint use_masked,
                                   np.npy_bool *out) nogil
+
+    void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
+                            double *pix, np.npy_intp d, binomial_t *binomial) nogil
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx
@@ -3642,7 +3642,7 @@ cdef class RandomGenerator:
         x.shape = tuple(final_shape)
         return x
 
-    def multinomial(self, np.npy_intp n, object pvals, size=None):
+    def multinomial(self, object n, object pvals, size=None):
         """
         multinomial(n, pvals, size=None)
 
@@ -3658,7 +3658,7 @@ cdef class RandomGenerator:
 
         Parameters
         ----------
-        n : int
+        n : int or array-like of ints
             Number of experiments.
         pvals : sequence of floats, length p
             Probabilities of each of the ``p`` different outcomes.  These
@@ -3697,6 +3697,18 @@ cdef class RandomGenerator:
         For the first run, we threw 3 times 1, 4 times 2, etc.  For the second,
         we threw 2 times 1, 4 times 2, etc.
 
+        Now, do one experiment throwing the dice 10 time, and 10 times again,
+        and another throwing the dice 20 times, and 20 times again:
+
+        >>> np.random.multinomial([[10], [20]], [1/6.]*6, size=2)
+        array([[[2, 4, 0, 1, 2, 1],
+                [1, 3, 0, 3, 1, 2]],
+               [[1, 4, 4, 4, 4, 3],
+                [3, 3, 2, 5, 5, 2]]])  # random
+
+        The first array shows the outcomes of throwing the dice 10 times, and
+        the second shows the outcomes from throwing the dice 20 times.
+
         A loaded die is more likely to land on number 6:
 
         >>> np.random.multinomial(100, [1/7.]*5 + [2/7.])
@@ -3717,19 +3729,44 @@ cdef class RandomGenerator:
         array([100,   0])
 
         """
-        cdef np.npy_intp d, i, j, dn, sz
-        cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"
+
+        cdef np.npy_intp d, i, sz, offset
+        cdef np.ndarray parr, mnarr, on, temp_arr
         cdef double *pix
         cdef int64_t *mnix
-        cdef double Sum
+        cdef int64_t ni
+        cdef np.broadcast it
 
         d = len(pvals)
+        on = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED)
         parr = <np.ndarray>np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED)
         pix = <double*>np.PyArray_DATA(parr)
-
+        check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1)
         if kahan_sum(pix, d-1) > (1.0 + 1e-12):
             raise ValueError("sum(pvals[:-1]) > 1.0")
 
+        if np.PyArray_NDIM(on) != 0: # vector
+            check_array_constraint(on, 'n', CONS_NON_NEGATIVE)
+            if size is None:
+                it = np.PyArray_MultiIterNew1(on)
+            else:
+                temp = np.empty(size, dtype=np.int8)
+                temp_arr = <np.ndarray>temp
+                it = np.PyArray_MultiIterNew2(on, temp_arr)
+            shape = it.shape + (d,)
+            multin = np.zeros(shape, dtype=np.int64)
+            mnarr = <np.ndarray>multin
+            mnix = <int64_t*>np.PyArray_DATA(mnarr)
+            offset = 0
+            sz = it.size
+            with self.lock, nogil:
+                for i in range(sz):
+                    ni = (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
+                    random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
+                    offset += d
+                    np.PyArray_MultiIter_NEXT(it)
+            return multin
+
         if size is None:
             shape = (d,)
         else:
@@ -3742,23 +3779,13 @@ cdef class RandomGenerator:
         mnarr = <np.ndarray>multin
         mnix = <int64_t*>np.PyArray_DATA(mnarr)
         sz = np.PyArray_SIZE(mnarr)
-
+        ni = n
+        check_constraint(ni, 'n', CONS_NON_NEGATIVE)
+        offset = 0
         with self.lock, nogil:
-            i = 0
-            while i < sz:
-                Sum = 1.0
-                dn = n
-                for j in range(d-1):
-                    mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn,
-                                                self._binomial)
-                    dn = dn - mnix[i+j]
-                    if dn <= 0:
-                        break
-                    Sum = Sum - pix[j]
-                if dn > 0:
-                    mnix[i+d-1] = dn
-
-                i = i + d
+            for i in range(sz // d):
+                random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
+                offset += d
 
         return multin
 

diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
@@ -3790,16 +3790,16 @@ cdef class RandomState:
         array([100,   0])
 
         """
-        cdef np.npy_intp d, i, j, dn, sz
-        cdef np.ndarray parr "arrayObject_parr", mnarr "arrayObject_mnarr"
+        cdef np.npy_intp d, i, sz, offset
+        cdef np.ndarray parr, mnarr
         cdef double *pix
         cdef int64_t *mnix
-        cdef double Sum
+        cdef int64_t ni
 
         d = len(pvals)
         parr = <np.ndarray>np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED)
         pix = <double*>np.PyArray_DATA(parr)
-
+        check_array_constraint(parr, 'pvals', CONS_BOUNDED_0_1)
         if kahan_sum(pix, d-1) > (1.0 + 1e-12):
             raise ValueError("sum(pvals[:-1]) > 1.0")
 
@@ -3815,23 +3815,13 @@ cdef class RandomState:
         mnarr = <np.ndarray>multin
         mnix = <int64_t*>np.PyArray_DATA(mnarr)
         sz = np.PyArray_SIZE(mnarr)
-
+        ni = n
+        check_constraint(ni, 'n', CONS_NON_NEGATIVE)
+        offset = 0
         with self.lock, nogil:
-            i = 0
-            while i < sz:
-                Sum = 1.0
-                dn = n
-                for j in range(d-1):
-                    mnix[i+j] = random_binomial(self._brng, pix[j]/Sum, dn,
-                                                self._binomial)
-                    dn = dn - mnix[i+j]
-                    if dn <= 0:
-                        break
-                    Sum = Sum - pix[j]
-                if dn > 0:
-                    mnix[i+d-1] = dn
-
-                i = i + d
+            for i in range(sz // d):
+                random_multinomial(self._brng, ni, &mnix[offset], pix, d, self._binomial)
+                offset += d
 
         return multin
 

diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c
@@ -1048,25 +1048,31 @@ int64_t random_geometric(brng_t *brng_state, double p) {
 }
 
 int64_t random_zipf(brng_t *brng_state, double a) {
-  double T, U, V;
-  int64_t X;
   double am1, b;
 
   am1 = a - 1.0;
   b = pow(2.0, am1);
-  do {
-    U = 1.0 - next_double(brng_state);
-    V = next_double(brng_state);
-    X = (int64_t)floor(pow(U, -1.0 / am1));
-    /* The real result may be above what can be represented in a int64.
-     * It will get casted to -sys.maxint-1. Since this is
-     * a straightforward rejection algorithm, we can just reject this value
-     * in the rejection condition below. This function then models a Zipf
+  while (1) {
+    double T, U, V, X;
+
+    U = 1.0 - random_double(brng_state);
+    V = random_double(brng_state);
+    X = floor(pow(U, -1.0 / am1));
+    /*
+     * The real result may be above what can be represented in a signed
+     * long. Since this is a straightforward rejection algorithm, we can
+     * just reject this value. This function then models a Zipf
      * distribution truncated to sys.maxint.
      */
+    if (X > LONG_MAX || X < 1.0) {
+      continue;
+    }
+
     T = pow(1.0 + 1.0 / X, am1);
-  } while (((V * X * (T - 1.0) / (b - 1.0)) > (T / b)) || X < 1);
-  return X;
+    if (V * X * (T - 1.0) / (b - 1.0) <= T / b) {
+      return (long)X;
+    }
+  }
 }
 
 double random_triangular(brng_t *brng_state, double left, double mode,
@@ -1179,8 +1185,10 @@ int64_t random_hypergeometric(brng_t *brng_state, int64_t good, int64_t bad,
                               int64_t sample) {
   if (sample > 10) {
     return random_hypergeometric_hrua(brng_state, good, bad, sample);
-  } else {
+  } else if (sample > 0) {
     return random_hypergeometric_hyp(brng_state, good, bad, sample);
+  } else {
+  return 0;
   }
 }
 
@@ -1809,3 +1817,21 @@ void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, npy_bool rng,
     out[i] = buffered_bounded_bool(brng_state, off, rng, mask, &bcnt, &buf);
   }
 }
+
+void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
+                        double *pix, npy_intp d, binomial_t *binomial) {
+  double remaining_p = 1.0;
+  npy_intp j;
+  int64_t dn = n;
+  for (j = 0; j < (d - 1); j++) {
+    mnix[j] = random_binomial(brng_state, pix[j] / remaining_p, dn, binomial);
+    dn = dn - mnix[j];
+    if (dn <= 0) {
+      break;
+    }
+    remaining_p -= pix[j];
+    if (dn > 0) {
+      mnix[d - 1] = dn;
+    }
+  }
+}
diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/src/distributions/distributions.h
@@ -217,4 +217,7 @@ DECLDIR void random_bounded_bool_fill(brng_t *brng_state, npy_bool off,
                                       npy_bool rng, npy_intp cnt,
                                       bool use_masked, npy_bool *out);
 
+DECLDIR void random_multinomial(brng_t *brng_state, int64_t n, int64_t *mnix,
+                                double *pix, npy_intp d, binomial_t *binomial);
+
 #endif
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
@@ -90,6 +90,11 @@ def test_size(self):
 
     def test_invalid_prob(self):
         assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2])
+        assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9])
+
+    def test_invalid_n(self):
+        assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2])
+        assert_raises(ValueError, random.multinomial, [-1] * 10, [0.8, 0.2])
 
 
 class TestSetState(object):
@@ -804,8 +809,7 @@ def test_geometric_exceptions(self):
         assert_raises(ValueError, random.geometric, [1.1] * 10)
         assert_raises(ValueError, random.geometric, -0.1)
         assert_raises(ValueError, random.geometric, [-0.1] * 10)
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
+        with np.errstate(invalid='ignore'):
             assert_raises(ValueError, random.geometric, np.nan)
             assert_raises(ValueError, random.geometric, [np.nan] * 10)
 
@@ -888,8 +892,7 @@ def test_logseries(self):
         assert_array_equal(actual, desired)
 
     def test_logseries_exceptions(self):
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
+        with np.errstate(invalid='ignore'):
             assert_raises(ValueError, random.logseries, np.nan)
             assert_raises(ValueError, random.logseries, [np.nan] * 10)
 
@@ -964,8 +967,7 @@ def test_negative_binomial(self):
         assert_array_equal(actual, desired)
 
     def test_negative_binomial_exceptions(self):
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
+        with np.errstate(invalid='ignore'):
             assert_raises(ValueError, random.negative_binomial, 100, np.nan)
             assert_raises(ValueError, random.negative_binomial, 100,
                           [np.nan] * 10)
@@ -1046,8 +1048,7 @@ def test_poisson_exceptions(self):
         assert_raises(ValueError, random.poisson, [lamneg] * 10)
         assert_raises(ValueError, random.poisson, lambig)
         assert_raises(ValueError, random.poisson, [lambig] * 10)
-        with suppress_warnings() as sup:
-            sup.record(RuntimeWarning)
+        with np.errstate(invalid='ignore'):
             assert_raises(ValueError, random.poisson, np.nan)
             assert_raises(ValueError, random.poisson, [np.nan] * 10)
 
@@ -1849,6 +1850,23 @@ def test_logseries(self):
         assert_raises(ValueError, logseries, bad_p_one * 3)
         assert_raises(ValueError, logseries, bad_p_two * 3)
 
+    def test_multinomial(self):
+        random.brng.seed(self.seed)
+        actual = random.multinomial([5, 20], [1 / 6.] * 6, size=(3, 2))
+        desired = np.array([[[1, 1, 1, 1, 0, 1],
+                             [4, 5, 1, 4, 3, 3]],
+                            [[1, 1, 1, 0, 0, 2],
+                             [2, 0, 4, 3, 7, 4]],
+                            [[1, 2, 0, 0, 2, 2],
+                             [3, 2, 3, 4, 2, 6]]], dtype=np.int64)
+        assert_array_equal(actual, desired)
+
+        random.brng.seed(self.seed)
+        actual = random.multinomial([5, 20], [1 / 6.] * 6)
+        desired = np.array([[1, 1, 1, 1, 0, 1],
+                            [4, 5, 1, 4, 3, 3]], dtype=np.int64)
+        assert_array_equal(actual, desired)
+
 
 class TestThread(object):
     # make sure each state produces the same sequence even in threads

diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py
@@ -112,6 +112,10 @@ def test_size(self):
 
     def test_invalid_prob(self):
         assert_raises(ValueError, random.multinomial, 100, [1.1, 0.2])
+        assert_raises(ValueError, random.multinomial, 100, [-.1, 0.9])
+
+    def test_invalid_n(self):
+        assert_raises(ValueError, random.multinomial, -1, [0.8, 0.2])
 
 
 class TestSetState(object):