Skip to content

Commit

Permalink
BUG: Correct handling of nans
Browse files Browse the repository at this point in the history
Improve consistency of nan handling
Prevent nans prducing values from int functions
  • Loading branch information
bashtage committed Apr 8, 2019
1 parent c59585e commit 31f942d
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 63 deletions.
1 change: 1 addition & 0 deletions numpy/random/randomgen/common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ cdef enum ConstraintType:
CONS_NONE
CONS_NON_NEGATIVE
CONS_POSITIVE
CONS_POSITIVE_NOT_NAN
CONS_BOUNDED_0_1
CONS_BOUNDED_0_1_NOTNAN
CONS_BOUNDED_GT_0_1
Expand Down
39 changes: 22 additions & 17 deletions numpy/random/randomgen/common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -295,52 +295,57 @@ cdef uint64_t MAXSIZE = <uint64_t>sys.maxsize

cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1:
if cons == CONS_NON_NEGATIVE:
if np.any(np.signbit(val)) or np.any(np.isnan(val)):
if np.any(np.logical_and(np.logical_not(np.isnan(val)), np.signbit(val))):
raise ValueError(name + " < 0")
elif cons == CONS_POSITIVE:
if not np.all(np.greater(val, 0)):
elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
if cons == CONS_POSITIVE_NOT_NAN and np.any(np.isnan(val)):
raise ValueError(name + " must not be NaN")
elif np.any(np.less_equal(val, 0)):
raise ValueError(name + " <= 0")
elif cons == CONS_BOUNDED_0_1:
if not np.all(np.greater_equal(val, 0)) or \
not np.all(np.less_equal(val, 1)):
raise ValueError(name + " < 0 or " + name + " > 1")
raise ValueError("{0} < 0 , {0} > 1 or {0} contains NaNs".format(name))
elif cons == CONS_BOUNDED_GT_0_1:
if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)):
raise ValueError(name + " <= 0 or " + name + " > 1")
raise ValueError("{0} <= 0 , {0} > 1 or {0} contains NaNs".format(name))
elif cons == CONS_GT_1:
if not np.all(np.greater(val, 1)):
raise ValueError(name + " <= 1")
raise ValueError("{0} <= 1 or {0} contains NaNs".format(name))
elif cons == CONS_GTE_1:
if not np.all(np.greater_equal(val, 1)):
raise ValueError(name + " < 1")
raise ValueError("{0} < 1 or {0} contains NaNs".format(name))
elif cons == CONS_POISSON:
if not np.all(np.less_equal(val, POISSON_LAM_MAX)):
raise ValueError(name + " value too large")
if not np.all(np.greater_equal(val, 0.0)):
raise ValueError(name + " < 0")
raise ValueError("{0} value too large".format(name))
elif not np.all(np.greater_equal(val, 0.0)):
raise ValueError("{0} < 0 or {0} contains NaNs".format(name))

return 0


cdef int check_constraint(double val, object name, constraint_type cons) except -1:
cdef bint is_nan
if cons == CONS_NON_NEGATIVE:
if np.signbit(val) or np.isnan(val):
if not np.isnan(val) and np.signbit(val):
raise ValueError(name + " < 0")
elif cons == CONS_POSITIVE:
if not (val > 0):
elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
if cons == CONS_POSITIVE_NOT_NAN and np.isnan(val):
raise ValueError(name + " must not be NaN")
elif val <= 0:
raise ValueError(name + " <= 0")
elif cons == CONS_BOUNDED_0_1:
if not (val >= 0) or not (val <= 1):
raise ValueError(name + " < 0 or " + name + " > 1")
raise ValueError("{0} < 0 , {0} > 1 or {0} is NaN".format(name))
elif cons == CONS_GT_1:
if not (val > 1):
raise ValueError(name + " <= 1")
raise ValueError("{0} <= 1 or {0} is NaN".format(name))
elif cons == CONS_GTE_1:
if not (val >= 1):
raise ValueError(name + " < 1")
raise ValueError("{0} < 1 or {0} is NaN".format(name))
elif cons == CONS_POISSON:
if not (val >= 0):
raise ValueError(name + " < 0")
raise ValueError("{0} < 0 or {0} is NaN".format(name))
elif not (val <= POISSON_LAM_MAX):
raise ValueError(name + " value too large")

Expand Down
38 changes: 19 additions & 19 deletions numpy/random/randomgen/generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ cdef class RandomGenerator:
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, should all be positive.
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
dtype : {str, dtype}, optional
Desired dtype of the result, either 'd' (or 'float64') or 'f'
Expand Down Expand Up @@ -953,7 +953,7 @@ cdef class RandomGenerator:
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, should be all positive.
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
dtype : {str, dtype}, optional
Desired dtype of the result, either 'd' (or 'float64') or 'f'
Expand Down Expand Up @@ -1442,7 +1442,7 @@ cdef class RandomGenerator:
Samples are drawn from an F distribution with specified parameters,
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
freedom in denominator), where both parameters should be greater than
freedom in denominator), where both parameters must be greater than
zero.
The random variate of the F distribution (also known as the
Expand All @@ -1453,9 +1453,9 @@ cdef class RandomGenerator:
Parameters
----------
dfnum : float or array_like of floats
Degrees of freedom in numerator, should be > 0.
Degrees of freedom in numerator, must be > 0.
dfden : float or array_like of float
Degrees of freedom in denominator, should be > 0.
Degrees of freedom in denominator, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1536,15 +1536,15 @@ cdef class RandomGenerator:
Parameters
----------
dfnum : float or array_like of floats
Numerator degrees of freedom, should be > 0.
Numerator degrees of freedom, must be > 0.
.. versionchanged:: 1.14.0
Earlier NumPy versions required dfnum > 1.
dfden : float or array_like of floats
Denominator degrees of freedom, should be > 0.
Denominator degrees of freedom, must be > 0.
nonc : float or array_like of floats
Non-centrality parameter, the sum of the squares of the numerator
means, should be >= 0.
means, must be >= 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1613,7 +1613,7 @@ cdef class RandomGenerator:
Parameters
----------
df : float or array_like of floats
Number of degrees of freedom, should be > 0.
Number of degrees of freedom, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1679,12 +1679,12 @@ cdef class RandomGenerator:
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
Degrees of freedom, must be > 0.
.. versionchanged:: 1.10.0
Earlier NumPy versions required dfnum > 1.
nonc : float or array_like of floats
Non-centrality, should be non-negative.
Non-centrality, must be non-negative.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1825,7 +1825,7 @@ cdef class RandomGenerator:
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
Degrees of freedom, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -2015,7 +2015,7 @@ cdef class RandomGenerator:
Parameters
----------
a : float or array_like of floats
Shape of the distribution. Must all be positive.
Shape of the distribution. Must be positive.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -2831,9 +2831,9 @@ cdef class RandomGenerator:
Lower limit.
mode : float or array_like of floats
The value where the peak of the distribution occurs.
The value should fulfill the condition ``left <= mode <= right``.
The value must fulfill the condition ``left <= mode <= right``.
right : float or array_like of floats
Upper limit, should be larger than `left`.
Upper limit, must be larger than `left`.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -3128,7 +3128,7 @@ cdef class RandomGenerator:
"""
return disc(&random_negative_binomial, self._brng, size, self.lock, 2, 0,
n, 'n', CONS_POSITIVE,
n, 'n', CONS_POSITIVE_NOT_NAN,
p, 'p', CONS_BOUNDED_0_1,
0.0, '', CONS_NONE)

Expand All @@ -3144,7 +3144,7 @@ cdef class RandomGenerator:
Parameters
----------
lam : float or array_like of floats
Expectation of interval, should be >= 0. A sequence of expectation
Expectation of interval, must be >= 0. A sequence of expectation
intervals must be broadcastable over the requested size.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
Expand Down Expand Up @@ -3483,7 +3483,7 @@ cdef class RandomGenerator:
Notes
-----
The probability density for the Log Series distribution is
The probability mass function for the Log Series distribution is
.. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)},
Expand Down Expand Up @@ -3717,7 +3717,7 @@ cdef class RandomGenerator:
Number of experiments.
pvals : sequence of floats, length p
Probabilities of each of the ``p`` different outcomes. These
should sum to 1 (however, the last element is always assumed to
must sum to 1 (however, the last element is always assumed to
account for the remaining probability, as long as
``sum(pvals[:-1]) <= 1)``.
size : int or tuple of ints, optional
Expand Down
38 changes: 19 additions & 19 deletions numpy/random/randomgen/mtrand.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3
import operator
import warnings
from collections.abc import Mapping

from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
from cpython cimport (Py_INCREF, PyFloat_AsDouble)
from libc cimport string
Expand Down Expand Up @@ -250,7 +250,7 @@ cdef class RandomState:
Vol. 8, No. 1, pp. 3-30, Jan. 1998.
"""
if isinstance(state, Mapping):
if isinstance(state, dict):
if 'brng' not in state or 'state' not in state:
raise ValueError('state dictionary is not valid.')
st = state
Expand Down Expand Up @@ -955,7 +955,7 @@ cdef class RandomState:
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, should all be positive.
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
Returns
Expand Down Expand Up @@ -1001,7 +1001,7 @@ cdef class RandomState:
Parameters
----------
d0, d1, ..., dn : int, optional
The dimensions of the returned array, should be all positive.
The dimensions of the returned array, must be non-negative.
If no argument is given a single Python float is returned.
Returns
Expand Down Expand Up @@ -1458,7 +1458,7 @@ cdef class RandomState:
Samples are drawn from an F distribution with specified parameters,
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
freedom in denominator), where both parameters should be greater than
freedom in denominator), where both parameters must be greater than
zero.
The random variate of the F distribution (also known as the
Expand All @@ -1469,9 +1469,9 @@ cdef class RandomState:
Parameters
----------
dfnum : float or array_like of floats
Degrees of freedom in numerator, should be > 0.
Degrees of freedom in numerator, must be > 0.
dfden : float or array_like of float
Degrees of freedom in denominator, should be > 0.
Degrees of freedom in denominator, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1552,15 +1552,15 @@ cdef class RandomState:
Parameters
----------
dfnum : float or array_like of floats
Numerator degrees of freedom, should be > 0.
Numerator degrees of freedom, must be > 0.
.. versionchanged:: 1.14.0
Earlier NumPy versions required dfnum > 1.
dfden : float or array_like of floats
Denominator degrees of freedom, should be > 0.
Denominator degrees of freedom, must be > 0.
nonc : float or array_like of floats
Non-centrality parameter, the sum of the squares of the numerator
means, should be >= 0.
means, must be >= 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1629,7 +1629,7 @@ cdef class RandomState:
Parameters
----------
df : float or array_like of floats
Number of degrees of freedom, should be > 0.
Number of degrees of freedom, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1695,12 +1695,12 @@ cdef class RandomState:
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
Degrees of freedom, must be > 0.
.. versionchanged:: 1.10.0
Earlier NumPy versions required dfnum > 1.
nonc : float or array_like of floats
Non-centrality, should be non-negative.
Non-centrality, must be non-negative.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -1841,7 +1841,7 @@ cdef class RandomState:
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
Degrees of freedom, must be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -2031,7 +2031,7 @@ cdef class RandomState:
Parameters
----------
a : float or array_like of floats
Shape of the distribution. Must all be positive.
Shape of the distribution. Must be positive.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -2847,9 +2847,9 @@ cdef class RandomState:
Lower limit.
mode : float or array_like of floats
The value where the peak of the distribution occurs.
The value should fulfill the condition ``left <= mode <= right``.
The value must fulfill the condition ``left <= mode <= right``.
right : float or array_like of floats
Upper limit, should be larger than `left`.
Upper limit, must be larger than `left`.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
Expand Down Expand Up @@ -3160,7 +3160,7 @@ cdef class RandomState:
Parameters
----------
lam : float or array_like of floats
Expectation of interval, should be >= 0. A sequence of expectation
Expectation of interval, must be >= 0. A sequence of expectation
intervals must be broadcastable over the requested size.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
Expand Down Expand Up @@ -3735,7 +3735,7 @@ cdef class RandomState:
Number of experiments.
pvals : sequence of floats, length p
Probabilities of each of the ``p`` different outcomes. These
should sum to 1 (however, the last element is always assumed to
must sum to 1 (however, the last element is always assumed to
account for the remaining probability, as long as
``sum(pvals[:-1]) <= 1)``.
size : int or tuple of ints, optional
Expand Down
7 changes: 6 additions & 1 deletion numpy/random/randomgen/src/distributions/distributions.c
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,9 @@ int64_t random_binomial(brng_t *brng_state, double p, int64_t n,
}

double random_noncentral_chisquare(brng_t *brng_state, double df, double nonc) {
if (npy_isnan(nonc)){
return NPY_NAN;
}
if (nonc == 0) {
return random_chisquare(brng_state, df);
}
Expand Down Expand Up @@ -939,7 +942,9 @@ double random_vonmises(brng_t *brng_state, double mu, double kappa) {
double U, V, W, Y, Z;
double result, mod;
int neg;

if (npy_isnan(kappa)){
return NPY_NAN;
}
if (kappa < 1e-8) {
return M_PI * (2 * next_double(brng_state) - 1);
} else {
Expand Down
Loading

0 comments on commit 31f942d

Please sign in to comment.