Permalink
8119 lines (6772 sloc) 250 KB
"""
numpy.ma : a package to handle missing or invalid values.
This package was initially written for numarray by Paul F. Dubois
at Lawrence Livermore National Laboratory.
In 2006, the package was completely rewritten by Pierre Gerard-Marchant
(University of Georgia) to make the MaskedArray class a subclass of ndarray,
and to improve support of structured arrays.
Copyright 1999, 2000, 2001 Regents of the University of California.
Released for unlimited redistribution.
* Adapted for numpy_core 2005 by Travis Oliphant and (mainly) Paul Dubois.
* Subclassing of the base `ndarray` 2006 by Pierre Gerard-Marchant
(pgmdevlist_AT_gmail_DOT_com)
* Improvements suggested by Reggie Dugard (reggie_AT_merfinllc_DOT_com)
.. moduleauthor:: Pierre Gerard-Marchant
"""
# pylint: disable-msg=E1002
from __future__ import division, absolute_import, print_function
import sys
import operator
import warnings
import textwrap
from functools import reduce
if sys.version_info[0] >= 3:
import builtins
else:
import __builtin__ as builtins
import numpy as np
import numpy.core.umath as umath
import numpy.core.numerictypes as ntypes
from numpy import ndarray, amax, amin, iscomplexobj, bool_, _NoValue
from numpy import array as narray
from numpy.lib.function_base import angle
from numpy.compat import (
getargspec, formatargspec, long, basestring, unicode, bytes
)
from numpy import expand_dims as n_expand_dims
from numpy.core.multiarray import normalize_axis_index
from numpy.core.numeric import normalize_axis_tuple
if sys.version_info[0] >= 3:
import pickle
else:
import cPickle as pickle
__all__ = [
'MAError', 'MaskError', 'MaskType', 'MaskedArray', 'abs', 'absolute',
'add', 'all', 'allclose', 'allequal', 'alltrue', 'amax', 'amin',
'angle', 'anom', 'anomalies', 'any', 'append', 'arange', 'arccos',
'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh',
'argmax', 'argmin', 'argsort', 'around', 'array', 'asanyarray',
'asarray', 'bitwise_and', 'bitwise_or', 'bitwise_xor', 'bool_', 'ceil',
'choose', 'clip', 'common_fill_value', 'compress', 'compressed',
'concatenate', 'conjugate', 'convolve', 'copy', 'correlate', 'cos', 'cosh',
'count', 'cumprod', 'cumsum', 'default_fill_value', 'diag', 'diagonal',
'diff', 'divide', 'dump', 'dumps', 'empty', 'empty_like', 'equal', 'exp',
'expand_dims', 'fabs', 'filled', 'fix_invalid', 'flatten_mask',
'flatten_structured_array', 'floor', 'floor_divide', 'fmod',
'frombuffer', 'fromflex', 'fromfunction', 'getdata', 'getmask',
'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot',
'identity', 'ids', 'indices', 'inner', 'innerproduct', 'isMA',
'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'left_shift',
'less', 'less_equal', 'load', 'loads', 'log', 'log10', 'log2',
'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'make_mask',
'make_mask_descr', 'make_mask_none', 'mask_or', 'masked',
'masked_array', 'masked_equal', 'masked_greater',
'masked_greater_equal', 'masked_inside', 'masked_invalid',
'masked_less', 'masked_less_equal', 'masked_not_equal',
'masked_object', 'masked_outside', 'masked_print_option',
'masked_singleton', 'masked_values', 'masked_where', 'max', 'maximum',
'maximum_fill_value', 'mean', 'min', 'minimum', 'minimum_fill_value',
'mod', 'multiply', 'mvoid', 'ndim', 'negative', 'nomask', 'nonzero',
'not_equal', 'ones', 'outer', 'outerproduct', 'power', 'prod',
'product', 'ptp', 'put', 'putmask', 'rank', 'ravel', 'remainder',
'repeat', 'reshape', 'resize', 'right_shift', 'round', 'round_',
'set_fill_value', 'shape', 'sin', 'sinh', 'size', 'soften_mask',
'sometrue', 'sort', 'sqrt', 'squeeze', 'std', 'subtract', 'sum',
'swapaxes', 'take', 'tan', 'tanh', 'trace', 'transpose', 'true_divide',
'var', 'where', 'zeros',
]
MaskType = np.bool_
nomask = MaskType(0)
class MaskedArrayFutureWarning(FutureWarning):
pass
def _deprecate_argsort_axis(arr):
"""
Adjust the axis passed to argsort, warning if necessary
Parameters
----------
arr
The array which argsort was called on
np.ma.argsort has a long-term bug where the default of the axis argument
is wrong (gh-8701), which now must be kept for backwards compatibiity.
Thankfully, this only makes a difference when arrays are 2- or more-
dimensional, so we only need a warning then.
"""
if arr.ndim <= 1:
# no warning needed - but switch to -1 anyway, to avoid surprising
# subclasses, which are more likely to implement scalar axes.
return -1
else:
# 2017-04-11, Numpy 1.13.0, gh-8701: warn on axis default
warnings.warn(
"In the future the default for argsort will be axis=-1, not the "
"current None, to match its documentation and np.argsort. "
"Explicitly pass -1 or None to silence this warning.",
MaskedArrayFutureWarning, stacklevel=3)
return None
def doc_note(initialdoc, note):
"""
Adds a Notes section to an existing docstring.
"""
if initialdoc is None:
return
if note is None:
return initialdoc
# FIXME: disable this function for the moment until we figure out what to
# do with it. Currently it may result in duplicate Notes sections or Notes
# sections in the wrong place
return initialdoc
newdoc = """
%s
Notes
-----
%s
"""
return newdoc % (initialdoc, note)
def get_object_signature(obj):
"""
Get the signature from obj
"""
try:
sig = formatargspec(*getargspec(obj))
except TypeError:
sig = ''
return sig
###############################################################################
# Exceptions #
###############################################################################
class MAError(Exception):
"""
Class for masked array related errors.
"""
pass
class MaskError(MAError):
"""
Class for mask related errors.
"""
pass
###############################################################################
# Filling options #
###############################################################################
# b: boolean - c: complex - f: floats - i: integer - O: object - S: string
default_filler = {'b': True,
'c': 1.e20 + 0.0j,
'f': 1.e20,
'i': 999999,
'O': '?',
'S': b'N/A',
'u': 999999,
'V': b'???',
'U': u'N/A'
}
# Add datetime64 and timedelta64 types
for v in ["Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps",
"fs", "as"]:
default_filler["M8[" + v + "]"] = np.datetime64("NaT", v)
default_filler["m8[" + v + "]"] = np.timedelta64("NaT", v)
max_filler = ntypes._minvals
max_filler.update([(k, -np.inf) for k in [np.float32, np.float64]])
min_filler = ntypes._maxvals
min_filler.update([(k, +np.inf) for k in [np.float32, np.float64]])
if 'float128' in ntypes.typeDict:
max_filler.update([(np.float128, -np.inf)])
min_filler.update([(np.float128, +np.inf)])
def _recursive_fill_value(dtype, f):
"""
Recursively produce a fill value for `dtype`, calling f on scalar dtypes
"""
if dtype.names:
vals = tuple(_recursive_fill_value(dtype[name], f) for name in dtype.names)
return np.array(vals, dtype=dtype)[()] # decay to void scalar from 0d
elif dtype.subdtype:
subtype, shape = dtype.subdtype
subval = _recursive_fill_value(subtype, f)
return np.full(shape, subval)
else:
return f(dtype)
def _get_dtype_of(obj):
""" Convert the argument for *_fill_value into a dtype """
if isinstance(obj, np.dtype):
return obj
elif hasattr(obj, 'dtype'):
return obj.dtype
else:
return np.asanyarray(obj).dtype
def default_fill_value(obj):
"""
Return the default fill value for the argument object.
The default filling value depends on the datatype of the input
array or the type of the input scalar:
======== ========
datatype default
======== ========
bool True
int 999999
float 1.e20
complex 1.e20+0j
object '?'
string 'N/A'
======== ========
For structured types, a structured scalar is returned, with each field the
default fill value for its type.
For subarray types, the fill value is an array of the same size containing
the default scalar fill value.
Parameters
----------
obj : ndarray, dtype or scalar
The array data-type or scalar for which the default fill value
is returned.
Returns
-------
fill_value : scalar
The default fill value.
Examples
--------
>>> np.ma.default_fill_value(1)
999999
>>> np.ma.default_fill_value(np.array([1.1, 2., np.pi]))
1e+20
>>> np.ma.default_fill_value(np.dtype(complex))
(1e+20+0j)
"""
def _scalar_fill_value(dtype):
if dtype.kind in 'Mm':
return default_filler.get(dtype.str[1:], '?')
else:
return default_filler.get(dtype.kind, '?')
dtype = _get_dtype_of(obj)
return _recursive_fill_value(dtype, _scalar_fill_value)
def _extremum_fill_value(obj, extremum, extremum_name):
def _scalar_fill_value(dtype):
try:
return extremum[dtype]
except KeyError:
raise TypeError(
"Unsuitable type {} for calculating {}."
.format(dtype, extremum_name)
)
dtype = _get_dtype_of(obj)
return _recursive_fill_value(dtype, _scalar_fill_value)
def minimum_fill_value(obj):
"""
Return the maximum value that can be represented by the dtype of an object.
This function is useful for calculating a fill value suitable for
taking the minimum of an array with a given dtype.
Parameters
----------
obj : ndarray, dtype or scalar
An object that can be queried for it's numeric type.
Returns
-------
val : scalar
The maximum representable value.
Raises
------
TypeError
If `obj` isn't a suitable numeric type.
See Also
--------
maximum_fill_value : The inverse function.
set_fill_value : Set the filling value of a masked array.
MaskedArray.fill_value : Return current fill value.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.int8()
>>> ma.minimum_fill_value(a)
127
>>> a = np.int32()
>>> ma.minimum_fill_value(a)
2147483647
An array of numeric data can also be passed.
>>> a = np.array([1, 2, 3], dtype=np.int8)
>>> ma.minimum_fill_value(a)
127
>>> a = np.array([1, 2, 3], dtype=np.float32)
>>> ma.minimum_fill_value(a)
inf
"""
return _extremum_fill_value(obj, min_filler, "minimum")
def maximum_fill_value(obj):
"""
Return the minimum value that can be represented by the dtype of an object.
This function is useful for calculating a fill value suitable for
taking the maximum of an array with a given dtype.
Parameters
----------
obj : ndarray, dtype or scalar
An object that can be queried for it's numeric type.
Returns
-------
val : scalar
The minimum representable value.
Raises
------
TypeError
If `obj` isn't a suitable numeric type.
See Also
--------
minimum_fill_value : The inverse function.
set_fill_value : Set the filling value of a masked array.
MaskedArray.fill_value : Return current fill value.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.int8()
>>> ma.maximum_fill_value(a)
-128
>>> a = np.int32()
>>> ma.maximum_fill_value(a)
-2147483648
An array of numeric data can also be passed.
>>> a = np.array([1, 2, 3], dtype=np.int8)
>>> ma.maximum_fill_value(a)
-128
>>> a = np.array([1, 2, 3], dtype=np.float32)
>>> ma.maximum_fill_value(a)
-inf
"""
return _extremum_fill_value(obj, max_filler, "maximum")
def _recursive_set_fill_value(fillvalue, dt):
"""
Create a fill value for a structured dtype.
Parameters
----------
fillvalue: scalar or array_like
Scalar or array representing the fill value. If it is of shorter
length than the number of fields in dt, it will be resized.
dt: dtype
The structured dtype for which to create the fill value.
Returns
-------
val: tuple
A tuple of values corresponding to the structured fill value.
"""
fillvalue = np.resize(fillvalue, len(dt.names))
output_value = []
for (fval, name) in zip(fillvalue, dt.names):
cdtype = dt[name]
if cdtype.subdtype:
cdtype = cdtype.subdtype[0]
if cdtype.names:
output_value.append(tuple(_recursive_set_fill_value(fval, cdtype)))
else:
output_value.append(np.array(fval, dtype=cdtype).item())
return tuple(output_value)
def _check_fill_value(fill_value, ndtype):
"""
Private function validating the given `fill_value` for the given dtype.
If fill_value is None, it is set to the default corresponding to the dtype.
If fill_value is not None, its value is forced to the given dtype.
The result is always a 0d array.
"""
ndtype = np.dtype(ndtype)
fields = ndtype.fields
if fill_value is None:
fill_value = default_fill_value(ndtype)
elif fields:
fdtype = [(_[0], _[1]) for _ in ndtype.descr]
if isinstance(fill_value, (ndarray, np.void)):
try:
fill_value = np.array(fill_value, copy=False, dtype=fdtype)
except ValueError:
err_msg = "Unable to transform %s to dtype %s"
raise ValueError(err_msg % (fill_value, fdtype))
else:
fill_value = np.asarray(fill_value, dtype=object)
fill_value = np.array(_recursive_set_fill_value(fill_value, ndtype),
dtype=ndtype)
else:
if isinstance(fill_value, basestring) and (ndtype.char not in 'OSVU'):
err_msg = "Cannot set fill value of string with array of dtype %s"
raise TypeError(err_msg % ndtype)
else:
# In case we want to convert 1e20 to int.
try:
fill_value = np.array(fill_value, copy=False, dtype=ndtype)
except OverflowError:
# Raise TypeError instead of OverflowError. OverflowError
# is seldom used, and the real problem here is that the
# passed fill_value is not compatible with the ndtype.
err_msg = "Fill value %s overflows dtype %s"
raise TypeError(err_msg % (fill_value, ndtype))
return np.array(fill_value)
def set_fill_value(a, fill_value):
"""
Set the filling value of a, if a is a masked array.
This function changes the fill value of the masked array `a` in place.
If `a` is not a masked array, the function returns silently, without
doing anything.
Parameters
----------
a : array_like
Input array.
fill_value : dtype
Filling value. A consistency test is performed to make sure
the value is compatible with the dtype of `a`.
Returns
-------
None
Nothing returned by this function.
See Also
--------
maximum_fill_value : Return the default fill value for a dtype.
MaskedArray.fill_value : Return current fill value.
MaskedArray.set_fill_value : Equivalent method.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(5)
>>> a
array([0, 1, 2, 3, 4])
>>> a = ma.masked_where(a < 3, a)
>>> a
masked_array(data = [-- -- -- 3 4],
mask = [ True True True False False],
fill_value=999999)
>>> ma.set_fill_value(a, -999)
>>> a
masked_array(data = [-- -- -- 3 4],
mask = [ True True True False False],
fill_value=-999)
Nothing happens if `a` is not a masked array.
>>> a = range(5)
>>> a
[0, 1, 2, 3, 4]
>>> ma.set_fill_value(a, 100)
>>> a
[0, 1, 2, 3, 4]
>>> a = np.arange(5)
>>> a
array([0, 1, 2, 3, 4])
>>> ma.set_fill_value(a, 100)
>>> a
array([0, 1, 2, 3, 4])
"""
if isinstance(a, MaskedArray):
a.set_fill_value(fill_value)
return
def get_fill_value(a):
"""
Return the filling value of a, if any. Otherwise, returns the
default filling value for that type.
"""
if isinstance(a, MaskedArray):
result = a.fill_value
else:
result = default_fill_value(a)
return result
def common_fill_value(a, b):
"""
Return the common filling value of two masked arrays, if any.
If ``a.fill_value == b.fill_value``, return the fill value,
otherwise return None.
Parameters
----------
a, b : MaskedArray
The masked arrays for which to compare fill values.
Returns
-------
fill_value : scalar or None
The common fill value, or None.
Examples
--------
>>> x = np.ma.array([0, 1.], fill_value=3)
>>> y = np.ma.array([0, 1.], fill_value=3)
>>> np.ma.common_fill_value(x, y)
3.0
"""
t1 = get_fill_value(a)
t2 = get_fill_value(b)
if t1 == t2:
return t1
return None
def filled(a, fill_value=None):
"""
Return input as an array with masked data replaced by a fill value.
If `a` is not a `MaskedArray`, `a` itself is returned.
If `a` is a `MaskedArray` and `fill_value` is None, `fill_value` is set to
``a.fill_value``.
Parameters
----------
a : MaskedArray or array_like
An input object.
fill_value : scalar, optional
Filling value. Default is None.
Returns
-------
a : ndarray
The filled array.
See Also
--------
compressed
Examples
--------
>>> x = np.ma.array(np.arange(9).reshape(3, 3), mask=[[1, 0, 0],
... [1, 0, 0],
... [0, 0, 0]])
>>> x.filled()
array([[999999, 1, 2],
[999999, 4, 5],
[ 6, 7, 8]])
"""
if hasattr(a, 'filled'):
return a.filled(fill_value)
elif isinstance(a, ndarray):
# Should we check for contiguity ? and a.flags['CONTIGUOUS']:
return a
elif isinstance(a, dict):
return np.array(a, 'O')
else:
return np.array(a)
def get_masked_subclass(*arrays):
"""
Return the youngest subclass of MaskedArray from a list of (masked) arrays.
In case of siblings, the first listed takes over.
"""
if len(arrays) == 1:
arr = arrays[0]
if isinstance(arr, MaskedArray):
rcls = type(arr)
else:
rcls = MaskedArray
else:
arrcls = [type(a) for a in arrays]
rcls = arrcls[0]
if not issubclass(rcls, MaskedArray):
rcls = MaskedArray
for cls in arrcls[1:]:
if issubclass(cls, rcls):
rcls = cls
# Don't return MaskedConstant as result: revert to MaskedArray
if rcls.__name__ == 'MaskedConstant':
return MaskedArray
return rcls
def getdata(a, subok=True):
"""
Return the data of a masked array as an ndarray.
Return the data of `a` (if any) as an ndarray if `a` is a ``MaskedArray``,
else return `a` as a ndarray or subclass (depending on `subok`) if not.
Parameters
----------
a : array_like
Input ``MaskedArray``, alternatively a ndarray or a subclass thereof.
subok : bool
Whether to force the output to be a `pure` ndarray (False) or to
return a subclass of ndarray if appropriate (True, default).
See Also
--------
getmask : Return the mask of a masked array, or nomask.
getmaskarray : Return the mask of a masked array, or full array of False.
Examples
--------
>>> import numpy.ma as ma
>>> a = ma.masked_equal([[1,2],[3,4]], 2)
>>> a
masked_array(data =
[[1 --]
[3 4]],
mask =
[[False True]
[False False]],
fill_value=999999)
>>> ma.getdata(a)
array([[1, 2],
[3, 4]])
Equivalently use the ``MaskedArray`` `data` attribute.
>>> a.data
array([[1, 2],
[3, 4]])
"""
try:
data = a._data
except AttributeError:
data = np.array(a, copy=False, subok=subok)
if not subok:
return data.view(ndarray)
return data
get_data = getdata
def fix_invalid(a, mask=nomask, copy=True, fill_value=None):
"""
Return input with invalid data masked and replaced by a fill value.
Invalid data means values of `nan`, `inf`, etc.
Parameters
----------
a : array_like
Input array, a (subclass of) ndarray.
mask : sequence, optional
Mask. Must be convertible to an array of booleans with the same
shape as `data`. True indicates a masked (i.e. invalid) data.
copy : bool, optional
Whether to use a copy of `a` (True) or to fix `a` in place (False).
Default is True.
fill_value : scalar, optional
Value used for fixing invalid data. Default is None, in which case
the ``a.fill_value`` is used.
Returns
-------
b : MaskedArray
The input array with invalid entries fixed.
Notes
-----
A copy is performed by default.
Examples
--------
>>> x = np.ma.array([1., -1, np.nan, np.inf], mask=[1] + [0]*3)
>>> x
masked_array(data = [-- -1.0 nan inf],
mask = [ True False False False],
fill_value = 1e+20)
>>> np.ma.fix_invalid(x)
masked_array(data = [-- -1.0 -- --],
mask = [ True False True True],
fill_value = 1e+20)
>>> fixed = np.ma.fix_invalid(x)
>>> fixed.data
array([ 1.00000000e+00, -1.00000000e+00, 1.00000000e+20,
1.00000000e+20])
>>> x.data
array([ 1., -1., NaN, Inf])
"""
a = masked_array(a, copy=copy, mask=mask, subok=True)
invalid = np.logical_not(np.isfinite(a._data))
if not invalid.any():
return a
a._mask |= invalid
if fill_value is None:
fill_value = a.fill_value
a._data[invalid] = fill_value
return a
###############################################################################
# Ufuncs #
###############################################################################
ufunc_domain = {}
ufunc_fills = {}
class _DomainCheckInterval(object):
"""
Define a valid interval, so that :
``domain_check_interval(a,b)(x) == True`` where
``x < a`` or ``x > b``.
"""
def __init__(self, a, b):
"domain_check_interval(a,b)(x) = true where x < a or y > b"
if (a > b):
(a, b) = (b, a)
self.a = a
self.b = b
def __call__(self, x):
"Execute the call behavior."
# nans at masked positions cause RuntimeWarnings, even though
# they are masked. To avoid this we suppress warnings.
with np.errstate(invalid='ignore'):
return umath.logical_or(umath.greater(x, self.b),
umath.less(x, self.a))
class _DomainTan(object):
"""
Define a valid interval for the `tan` function, so that:
``domain_tan(eps) = True`` where ``abs(cos(x)) < eps``
"""
def __init__(self, eps):
"domain_tan(eps) = true where abs(cos(x)) < eps)"
self.eps = eps
def __call__(self, x):
"Executes the call behavior."
with np.errstate(invalid='ignore'):
return umath.less(umath.absolute(umath.cos(x)), self.eps)
class _DomainSafeDivide(object):
"""
Define a domain for safe division.
"""
def __init__(self, tolerance=None):
self.tolerance = tolerance
def __call__(self, a, b):
# Delay the selection of the tolerance to here in order to reduce numpy
# import times. The calculation of these parameters is a substantial
# component of numpy's import time.
if self.tolerance is None:
self.tolerance = np.finfo(float).tiny
# don't call ma ufuncs from __array_wrap__ which would fail for scalars
a, b = np.asarray(a), np.asarray(b)
with np.errstate(invalid='ignore'):
return umath.absolute(a) * self.tolerance >= umath.absolute(b)
class _DomainGreater(object):
"""
DomainGreater(v)(x) is True where x <= v.
"""
def __init__(self, critical_value):
"DomainGreater(v)(x) = true where x <= v"
self.critical_value = critical_value
def __call__(self, x):
"Executes the call behavior."
with np.errstate(invalid='ignore'):
return umath.less_equal(x, self.critical_value)
class _DomainGreaterEqual(object):
"""
DomainGreaterEqual(v)(x) is True where x < v.
"""
def __init__(self, critical_value):
"DomainGreaterEqual(v)(x) = true where x < v"
self.critical_value = critical_value
def __call__(self, x):
"Executes the call behavior."
with np.errstate(invalid='ignore'):
return umath.less(x, self.critical_value)
class _MaskedUFunc(object):
def __init__(self, ufunc):
self.f = ufunc
self.__doc__ = ufunc.__doc__
self.__name__ = ufunc.__name__
def __str__(self):
return "Masked version of {}".format(self.f)
class _MaskedUnaryOperation(_MaskedUFunc):
"""
Defines masked version of unary operations, where invalid values are
pre-masked.
Parameters
----------
mufunc : callable
The function for which to define a masked version. Made available
as ``_MaskedUnaryOperation.f``.
fill : scalar, optional
Filling value, default is 0.
domain : class instance
Domain for the function. Should be one of the ``_Domain*``
classes. Default is None.
"""
def __init__(self, mufunc, fill=0, domain=None):
super(_MaskedUnaryOperation, self).__init__(mufunc)
self.fill = fill
self.domain = domain
ufunc_domain[mufunc] = domain
ufunc_fills[mufunc] = fill
def __call__(self, a, *args, **kwargs):
"""
Execute the call behavior.
"""
d = getdata(a)
# Deal with domain
if self.domain is not None:
# Case 1.1. : Domained function
# nans at masked positions cause RuntimeWarnings, even though
# they are masked. To avoid this we suppress warnings.
with np.errstate(divide='ignore', invalid='ignore'):
result = self.f(d, *args, **kwargs)
# Make a mask
m = ~umath.isfinite(result)
m |= self.domain(d)
m |= getmask(a)
else:
# Case 1.2. : Function without a domain
# Get the result and the mask
with np.errstate(divide='ignore', invalid='ignore'):
result = self.f(d, *args, **kwargs)
m = getmask(a)
if not result.ndim:
# Case 2.1. : The result is scalarscalar
if m:
return masked
return result
if m is not nomask:
# Case 2.2. The result is an array
# We need to fill the invalid data back w/ the input Now,
# that's plain silly: in C, we would just skip the element and
# keep the original, but we do have to do it that way in Python
# In case result has a lower dtype than the inputs (as in
# equal)
try:
np.copyto(result, d, where=m)
except TypeError:
pass
# Transform to
masked_result = result.view(get_masked_subclass(a))
masked_result._mask = m
masked_result._update_from(a)
return masked_result
class _MaskedBinaryOperation(_MaskedUFunc):
"""
Define masked version of binary operations, where invalid
values are pre-masked.
Parameters
----------
mbfunc : function
The function for which to define a masked version. Made available
as ``_MaskedBinaryOperation.f``.
domain : class instance
Default domain for the function. Should be one of the ``_Domain*``
classes. Default is None.
fillx : scalar, optional
Filling value for the first argument, default is 0.
filly : scalar, optional
Filling value for the second argument, default is 0.
"""
def __init__(self, mbfunc, fillx=0, filly=0):
"""
abfunc(fillx, filly) must be defined.
abfunc(x, filly) = x for all x to enable reduce.
"""
super(_MaskedBinaryOperation, self).__init__(mbfunc)
self.fillx = fillx
self.filly = filly
ufunc_domain[mbfunc] = None
ufunc_fills[mbfunc] = (fillx, filly)
def __call__(self, a, b, *args, **kwargs):
"""
Execute the call behavior.
"""
# Get the data, as ndarray
(da, db) = (getdata(a), getdata(b))
# Get the result
with np.errstate():
np.seterr(divide='ignore', invalid='ignore')
result = self.f(da, db, *args, **kwargs)
# Get the mask for the result
(ma, mb) = (getmask(a), getmask(b))
if ma is nomask:
if mb is nomask:
m = nomask
else:
m = umath.logical_or(getmaskarray(a), mb)
elif mb is nomask:
m = umath.logical_or(ma, getmaskarray(b))
else:
m = umath.logical_or(ma, mb)
# Case 1. : scalar
if not result.ndim:
if m:
return masked
return result
# Case 2. : array
# Revert result to da where masked
if m is not nomask and m.any():
# any errors, just abort; impossible to guarantee masked values
try:
np.copyto(result, da, casting='unsafe', where=m)
except Exception:
pass
# Transforms to a (subclass of) MaskedArray
masked_result = result.view(get_masked_subclass(a, b))
masked_result._mask = m
if isinstance(a, MaskedArray):
masked_result._update_from(a)
elif isinstance(b, MaskedArray):
masked_result._update_from(b)
return masked_result
def reduce(self, target, axis=0, dtype=None):
"""
Reduce `target` along the given `axis`.
"""
tclass = get_masked_subclass(target)
m = getmask(target)
t = filled(target, self.filly)
if t.shape == ():
t = t.reshape(1)
if m is not nomask:
m = make_mask(m, copy=1)
m.shape = (1,)
if m is nomask:
tr = self.f.reduce(t, axis)
mr = nomask
else:
tr = self.f.reduce(t, axis, dtype=dtype or t.dtype)
mr = umath.logical_and.reduce(m, axis)
if not tr.shape:
if mr:
return masked
else:
return tr
masked_tr = tr.view(tclass)
masked_tr._mask = mr
return masked_tr
def outer(self, a, b):
"""
Return the function applied to the outer product of a and b.
"""
(da, db) = (getdata(a), getdata(b))
d = self.f.outer(da, db)
ma = getmask(a)
mb = getmask(b)
if ma is nomask and mb is nomask:
m = nomask
else:
ma = getmaskarray(a)
mb = getmaskarray(b)
m = umath.logical_or.outer(ma, mb)
if (not m.ndim) and m:
return masked
if m is not nomask:
np.copyto(d, da, where=m)
if not d.shape:
return d
masked_d = d.view(get_masked_subclass(a, b))
masked_d._mask = m
return masked_d
def accumulate(self, target, axis=0):
"""Accumulate `target` along `axis` after filling with y fill
value.
"""
tclass = get_masked_subclass(target)
t = filled(target, self.filly)
result = self.f.accumulate(t, axis)
masked_result = result.view(tclass)
return masked_result
class _DomainedBinaryOperation(_MaskedUFunc):
"""
Define binary operations that have a domain, like divide.
They have no reduce, outer or accumulate.
Parameters
----------
mbfunc : function
The function for which to define a masked version. Made available
as ``_DomainedBinaryOperation.f``.
domain : class instance
Default domain for the function. Should be one of the ``_Domain*``
classes.
fillx : scalar, optional
Filling value for the first argument, default is 0.
filly : scalar, optional
Filling value for the second argument, default is 0.
"""
def __init__(self, dbfunc, domain, fillx=0, filly=0):
"""abfunc(fillx, filly) must be defined.
abfunc(x, filly) = x for all x to enable reduce.
"""
super(_DomainedBinaryOperation, self).__init__(dbfunc)
self.domain = domain
self.fillx = fillx
self.filly = filly
ufunc_domain[dbfunc] = domain
ufunc_fills[dbfunc] = (fillx, filly)
def __call__(self, a, b, *args, **kwargs):
"Execute the call behavior."
# Get the data
(da, db) = (getdata(a), getdata(b))
# Get the result
with np.errstate(divide='ignore', invalid='ignore'):
result = self.f(da, db, *args, **kwargs)
# Get the mask as a combination of the source masks and invalid
m = ~umath.isfinite(result)
m |= getmask(a)
m |= getmask(b)
# Apply the domain
domain = ufunc_domain.get(self.f, None)
if domain is not None:
m |= domain(da, db)
# Take care of the scalar case first
if (not m.ndim):
if m:
return masked
else:
return result
# When the mask is True, put back da if possible
# any errors, just abort; impossible to guarantee masked values
try:
np.copyto(result, 0, casting='unsafe', where=m)
# avoid using "*" since this may be overlaid
masked_da = umath.multiply(m, da)
# only add back if it can be cast safely
if np.can_cast(masked_da.dtype, result.dtype, casting='safe'):
result += masked_da
except Exception:
pass
# Transforms to a (subclass of) MaskedArray
masked_result = result.view(get_masked_subclass(a, b))
masked_result._mask = m
if isinstance(a, MaskedArray):
masked_result._update_from(a)
elif isinstance(b, MaskedArray):
masked_result._update_from(b)
return masked_result
# Unary ufuncs
exp = _MaskedUnaryOperation(umath.exp)
conjugate = _MaskedUnaryOperation(umath.conjugate)
sin = _MaskedUnaryOperation(umath.sin)
cos = _MaskedUnaryOperation(umath.cos)
tan = _MaskedUnaryOperation(umath.tan)
arctan = _MaskedUnaryOperation(umath.arctan)
arcsinh = _MaskedUnaryOperation(umath.arcsinh)
sinh = _MaskedUnaryOperation(umath.sinh)
cosh = _MaskedUnaryOperation(umath.cosh)
tanh = _MaskedUnaryOperation(umath.tanh)
abs = absolute = _MaskedUnaryOperation(umath.absolute)
angle = _MaskedUnaryOperation(angle) # from numpy.lib.function_base
fabs = _MaskedUnaryOperation(umath.fabs)
negative = _MaskedUnaryOperation(umath.negative)
floor = _MaskedUnaryOperation(umath.floor)
ceil = _MaskedUnaryOperation(umath.ceil)
around = _MaskedUnaryOperation(np.round_)
logical_not = _MaskedUnaryOperation(umath.logical_not)
# Domained unary ufuncs
sqrt = _MaskedUnaryOperation(umath.sqrt, 0.0,
_DomainGreaterEqual(0.0))
log = _MaskedUnaryOperation(umath.log, 1.0,
_DomainGreater(0.0))
log2 = _MaskedUnaryOperation(umath.log2, 1.0,
_DomainGreater(0.0))
log10 = _MaskedUnaryOperation(umath.log10, 1.0,
_DomainGreater(0.0))
tan = _MaskedUnaryOperation(umath.tan, 0.0,
_DomainTan(1e-35))
arcsin = _MaskedUnaryOperation(umath.arcsin, 0.0,
_DomainCheckInterval(-1.0, 1.0))
arccos = _MaskedUnaryOperation(umath.arccos, 0.0,
_DomainCheckInterval(-1.0, 1.0))
arccosh = _MaskedUnaryOperation(umath.arccosh, 1.0,
_DomainGreaterEqual(1.0))
arctanh = _MaskedUnaryOperation(umath.arctanh, 0.0,
_DomainCheckInterval(-1.0 + 1e-15, 1.0 - 1e-15))
# Binary ufuncs
add = _MaskedBinaryOperation(umath.add)
subtract = _MaskedBinaryOperation(umath.subtract)
multiply = _MaskedBinaryOperation(umath.multiply, 1, 1)
arctan2 = _MaskedBinaryOperation(umath.arctan2, 0.0, 1.0)
equal = _MaskedBinaryOperation(umath.equal)
equal.reduce = None
not_equal = _MaskedBinaryOperation(umath.not_equal)
not_equal.reduce = None
less_equal = _MaskedBinaryOperation(umath.less_equal)
less_equal.reduce = None
greater_equal = _MaskedBinaryOperation(umath.greater_equal)
greater_equal.reduce = None
less = _MaskedBinaryOperation(umath.less)
less.reduce = None
greater = _MaskedBinaryOperation(umath.greater)
greater.reduce = None
logical_and = _MaskedBinaryOperation(umath.logical_and)
alltrue = _MaskedBinaryOperation(umath.logical_and, 1, 1).reduce
logical_or = _MaskedBinaryOperation(umath.logical_or)
sometrue = logical_or.reduce
logical_xor = _MaskedBinaryOperation(umath.logical_xor)
bitwise_and = _MaskedBinaryOperation(umath.bitwise_and)
bitwise_or = _MaskedBinaryOperation(umath.bitwise_or)
bitwise_xor = _MaskedBinaryOperation(umath.bitwise_xor)
hypot = _MaskedBinaryOperation(umath.hypot)
# Domained binary ufuncs
divide = _DomainedBinaryOperation(umath.divide, _DomainSafeDivide(), 0, 1)
true_divide = _DomainedBinaryOperation(umath.true_divide,
_DomainSafeDivide(), 0, 1)
floor_divide = _DomainedBinaryOperation(umath.floor_divide,
_DomainSafeDivide(), 0, 1)
remainder = _DomainedBinaryOperation(umath.remainder,
_DomainSafeDivide(), 0, 1)
fmod = _DomainedBinaryOperation(umath.fmod, _DomainSafeDivide(), 0, 1)
mod = _DomainedBinaryOperation(umath.mod, _DomainSafeDivide(), 0, 1)
###############################################################################
# Mask creation functions #
###############################################################################
def _replace_dtype_fields_recursive(dtype, primitive_dtype):
"Private function allowing recursion in _replace_dtype_fields."
_recurse = _replace_dtype_fields_recursive
# Do we have some name fields ?
if dtype.names:
descr = []
for name in dtype.names:
field = dtype.fields[name]
if len(field) == 3:
# Prepend the title to the name
name = (field[-1], name)
descr.append((name, _recurse(field[0], primitive_dtype)))
new_dtype = np.dtype(descr)
# Is this some kind of composite a la (float,2)
elif dtype.subdtype:
descr = list(dtype.subdtype)
descr[0] = _recurse(dtype.subdtype[0], primitive_dtype)
new_dtype = np.dtype(tuple(descr))
# this is a primitive type, so do a direct replacement
else:
new_dtype = primitive_dtype
# preserve identity of dtypes
if new_dtype == dtype:
new_dtype = dtype
return new_dtype
def _replace_dtype_fields(dtype, primitive_dtype):
"""
Construct a dtype description list from a given dtype.
Returns a new dtype object, with all fields and subtypes in the given type
recursively replaced with `primitive_dtype`.
Arguments are coerced to dtypes first.
"""
dtype = np.dtype(dtype)
primitive_dtype = np.dtype(primitive_dtype)
return _replace_dtype_fields_recursive(dtype, primitive_dtype)
def make_mask_descr(ndtype):
"""
Construct a dtype description list from a given dtype.
Returns a new dtype object, with the type of all fields in `ndtype` to a
boolean type. Field names are not altered.
Parameters
----------
ndtype : dtype
The dtype to convert.
Returns
-------
result : dtype
A dtype that looks like `ndtype`, the type of all fields is boolean.
Examples
--------
>>> import numpy.ma as ma
>>> dtype = np.dtype({'names':['foo', 'bar'],
'formats':[np.float32, int]})
>>> dtype
dtype([('foo', '<f4'), ('bar', '<i4')])
>>> ma.make_mask_descr(dtype)
dtype([('foo', '|b1'), ('bar', '|b1')])
>>> ma.make_mask_descr(np.float32)
dtype('bool')
"""
return _replace_dtype_fields(ndtype, MaskType)
def getmask(a):
"""
Return the mask of a masked array, or nomask.
Return the mask of `a` as an ndarray if `a` is a `MaskedArray` and the
mask is not `nomask`, else return `nomask`. To guarantee a full array
of booleans of the same shape as a, use `getmaskarray`.
Parameters
----------
a : array_like
Input `MaskedArray` for which the mask is required.
See Also
--------
getdata : Return the data of a masked array as an ndarray.
getmaskarray : Return the mask of a masked array, or full array of False.
Examples
--------
>>> import numpy.ma as ma
>>> a = ma.masked_equal([[1,2],[3,4]], 2)
>>> a
masked_array(data =
[[1 --]
[3 4]],
mask =
[[False True]
[False False]],
fill_value=999999)
>>> ma.getmask(a)
array([[False, True],
[False, False]])
Equivalently use the `MaskedArray` `mask` attribute.
>>> a.mask
array([[False, True],
[False, False]])
Result when mask == `nomask`
>>> b = ma.masked_array([[1,2],[3,4]])
>>> b
masked_array(data =
[[1 2]
[3 4]],
mask =
False,
fill_value=999999)
>>> ma.nomask
False
>>> ma.getmask(b) == ma.nomask
True
>>> b.mask == ma.nomask
True
"""
return getattr(a, '_mask', nomask)
get_mask = getmask
def getmaskarray(arr):
"""
Return the mask of a masked array, or full boolean array of False.
Return the mask of `arr` as an ndarray if `arr` is a `MaskedArray` and
the mask is not `nomask`, else return a full boolean array of False of
the same shape as `arr`.
Parameters
----------
arr : array_like
Input `MaskedArray` for which the mask is required.
See Also
--------
getmask : Return the mask of a masked array, or nomask.
getdata : Return the data of a masked array as an ndarray.
Examples
--------
>>> import numpy.ma as ma
>>> a = ma.masked_equal([[1,2],[3,4]], 2)
>>> a
masked_array(data =
[[1 --]
[3 4]],
mask =
[[False True]
[False False]],
fill_value=999999)
>>> ma.getmaskarray(a)
array([[False, True],
[False, False]])
Result when mask == ``nomask``
>>> b = ma.masked_array([[1,2],[3,4]])
>>> b
masked_array(data =
[[1 2]
[3 4]],
mask =
False,
fill_value=999999)
>>> >ma.getmaskarray(b)
array([[False, False],
[False, False]])
"""
mask = getmask(arr)
if mask is nomask:
mask = make_mask_none(np.shape(arr), getattr(arr, 'dtype', None))
return mask
def is_mask(m):
"""
Return True if m is a valid, standard mask.
This function does not check the contents of the input, only that the
type is MaskType. In particular, this function returns False if the
mask has a flexible dtype.
Parameters
----------
m : array_like
Array to test.
Returns
-------
result : bool
True if `m.dtype.type` is MaskType, False otherwise.
See Also
--------
isMaskedArray : Test whether input is an instance of MaskedArray.
Examples
--------
>>> import numpy.ma as ma
>>> m = ma.masked_equal([0, 1, 0, 2, 3], 0)
>>> m
masked_array(data = [-- 1 -- 2 3],
mask = [ True False True False False],
fill_value=999999)
>>> ma.is_mask(m)
False
>>> ma.is_mask(m.mask)
True
Input must be an ndarray (or have similar attributes)
for it to be considered a valid mask.
>>> m = [False, True, False]
>>> ma.is_mask(m)
False
>>> m = np.array([False, True, False])
>>> m
array([False, True, False])
>>> ma.is_mask(m)
True
Arrays with complex dtypes don't return True.
>>> dtype = np.dtype({'names':['monty', 'pithon'],
'formats':[bool, bool]})
>>> dtype
dtype([('monty', '|b1'), ('pithon', '|b1')])
>>> m = np.array([(True, False), (False, True), (True, False)],
dtype=dtype)
>>> m
array([(True, False), (False, True), (True, False)],
dtype=[('monty', '|b1'), ('pithon', '|b1')])
>>> ma.is_mask(m)
False
"""
try:
return m.dtype.type is MaskType
except AttributeError:
return False
def _shrink_mask(m):
"""
Shrink a mask to nomask if possible
"""
if not m.dtype.names and not m.any():
return nomask
else:
return m
def make_mask(m, copy=False, shrink=True, dtype=MaskType):
"""
Create a boolean mask from an array.
Return `m` as a boolean mask, creating a copy if necessary or requested.
The function can accept any sequence that is convertible to integers,
or ``nomask``. Does not require that contents must be 0s and 1s, values
of 0 are interepreted as False, everything else as True.
Parameters
----------
m : array_like
Potential mask.
copy : bool, optional
Whether to return a copy of `m` (True) or `m` itself (False).
shrink : bool, optional
Whether to shrink `m` to ``nomask`` if all its values are False.
dtype : dtype, optional
Data-type of the output mask. By default, the output mask has a
dtype of MaskType (bool). If the dtype is flexible, each field has
a boolean dtype. This is ignored when `m` is ``nomask``, in which
case ``nomask`` is always returned.
Returns
-------
result : ndarray
A boolean mask derived from `m`.
Examples
--------
>>> import numpy.ma as ma
>>> m = [True, False, True, True]
>>> ma.make_mask(m)
array([ True, False, True, True])
>>> m = [1, 0, 1, 1]
>>> ma.make_mask(m)
array([ True, False, True, True])
>>> m = [1, 0, 2, -3]
>>> ma.make_mask(m)
array([ True, False, True, True])
Effect of the `shrink` parameter.
>>> m = np.zeros(4)
>>> m
array([ 0., 0., 0., 0.])
>>> ma.make_mask(m)
False
>>> ma.make_mask(m, shrink=False)
array([False, False, False, False])
Using a flexible `dtype`.
>>> m = [1, 0, 1, 1]
>>> n = [0, 1, 0, 0]
>>> arr = []
>>> for man, mouse in zip(m, n):
... arr.append((man, mouse))
>>> arr
[(1, 0), (0, 1), (1, 0), (1, 0)]
>>> dtype = np.dtype({'names':['man', 'mouse'],
'formats':[int, int]})
>>> arr = np.array(arr, dtype=dtype)
>>> arr
array([(1, 0), (0, 1), (1, 0), (1, 0)],
dtype=[('man', '<i4'), ('mouse', '<i4')])
>>> ma.make_mask(arr, dtype=dtype)
array([(True, False), (False, True), (True, False), (True, False)],
dtype=[('man', '|b1'), ('mouse', '|b1')])
"""
if m is nomask:
return nomask
# Make sure the input dtype is valid.
dtype = make_mask_descr(dtype)
# legacy boolean special case: "existence of fields implies true"
if isinstance(m, ndarray) and m.dtype.fields and dtype == np.bool_:
return np.ones(m.shape, dtype=dtype)
# Fill the mask in case there are missing data; turn it into an ndarray.
result = np.array(filled(m, True), copy=copy, dtype=dtype, subok=True)
# Bas les masques !
if shrink:
result = _shrink_mask(result)
return result
def make_mask_none(newshape, dtype=None):
"""
Return a boolean mask of the given shape, filled with False.
This function returns a boolean ndarray with all entries False, that can
be used in common mask manipulations. If a complex dtype is specified, the
type of each field is converted to a boolean type.
Parameters
----------
newshape : tuple
A tuple indicating the shape of the mask.
dtype : {None, dtype}, optional
If None, use a MaskType instance. Otherwise, use a new datatype with
the same fields as `dtype`, converted to boolean types.
Returns
-------
result : ndarray
An ndarray of appropriate shape and dtype, filled with False.
See Also
--------
make_mask : Create a boolean mask from an array.
make_mask_descr : Construct a dtype description list from a given dtype.
Examples
--------
>>> import numpy.ma as ma
>>> ma.make_mask_none((3,))
array([False, False, False])
Defining a more complex dtype.
>>> dtype = np.dtype({'names':['foo', 'bar'],
'formats':[np.float32, int]})
>>> dtype
dtype([('foo', '<f4'), ('bar', '<i4')])
>>> ma.make_mask_none((3,), dtype=dtype)
array([(False, False), (False, False), (False, False)],
dtype=[('foo', '|b1'), ('bar', '|b1')])
"""
if dtype is None:
result = np.zeros(newshape, dtype=MaskType)
else:
result = np.zeros(newshape, dtype=make_mask_descr(dtype))
return result
def mask_or(m1, m2, copy=False, shrink=True):
"""
Combine two masks with the ``logical_or`` operator.
The result may be a view on `m1` or `m2` if the other is `nomask`
(i.e. False).
Parameters
----------
m1, m2 : array_like
Input masks.
copy : bool, optional
If copy is False and one of the inputs is `nomask`, return a view
of the other input mask. Defaults to False.
shrink : bool, optional
Whether to shrink the output to `nomask` if all its values are
False. Defaults to True.
Returns
-------
mask : output mask
The result masks values that are masked in either `m1` or `m2`.
Raises
------
ValueError
If `m1` and `m2` have different flexible dtypes.
Examples
--------
>>> m1 = np.ma.make_mask([0, 1, 1, 0])
>>> m2 = np.ma.make_mask([1, 0, 0, 0])
>>> np.ma.mask_or(m1, m2)
array([ True, True, True, False])
"""
def _recursive_mask_or(m1, m2, newmask):
names = m1.dtype.names
for name in names:
current1 = m1[name]
if current1.dtype.names:
_recursive_mask_or(current1, m2[name], newmask[name])
else:
umath.logical_or(current1, m2[name], newmask[name])
return
if (m1 is nomask) or (m1 is False):
dtype = getattr(m2, 'dtype', MaskType)
return make_mask(m2, copy=copy, shrink=shrink, dtype=dtype)
if (m2 is nomask) or (m2 is False):
dtype = getattr(m1, 'dtype', MaskType)
return make_mask(m1, copy=copy, shrink=shrink, dtype=dtype)
if m1 is m2 and is_mask(m1):
return m1
(dtype1, dtype2) = (getattr(m1, 'dtype', None), getattr(m2, 'dtype', None))
if (dtype1 != dtype2):
raise ValueError("Incompatible dtypes '%s'<>'%s'" % (dtype1, dtype2))
if dtype1.names:
# Allocate an output mask array with the properly broadcast shape.
newmask = np.empty(np.broadcast(m1, m2).shape, dtype1)
_recursive_mask_or(m1, m2, newmask)
return newmask
return make_mask(umath.logical_or(m1, m2), copy=copy, shrink=shrink)
def flatten_mask(mask):
"""
Returns a completely flattened version of the mask, where nested fields
are collapsed.
Parameters
----------
mask : array_like
Input array, which will be interpreted as booleans.
Returns
-------
flattened_mask : ndarray of bools
The flattened input.
Examples
--------
>>> mask = np.array([0, 0, 1])
>>> flatten_mask(mask)
array([False, False, True])
>>> mask = np.array([(0, 0), (0, 1)], dtype=[('a', bool), ('b', bool)])
>>> flatten_mask(mask)
array([False, False, False, True])
>>> mdtype = [('a', bool), ('b', [('ba', bool), ('bb', bool)])]
>>> mask = np.array([(0, (0, 0)), (0, (0, 1))], dtype=mdtype)
>>> flatten_mask(mask)
array([False, False, False, False, False, True])
"""
def _flatmask(mask):
"Flatten the mask and returns a (maybe nested) sequence of booleans."
mnames = mask.dtype.names
if mnames:
return [flatten_mask(mask[name]) for name in mnames]
else:
return mask
def _flatsequence(sequence):
"Generates a flattened version of the sequence."
try:
for element in sequence:
if hasattr(element, '__iter__'):
for f in _flatsequence(element):
yield f
else:
yield element
except TypeError:
yield sequence
mask = np.asarray(mask)
flattened = _flatsequence(_flatmask(mask))
return np.array([_ for _ in flattened], dtype=bool)
def _check_mask_axis(mask, axis, keepdims=np._NoValue):
"Check whether there are masked values along the given axis"
kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
if mask is not nomask:
return mask.all(axis=axis, **kwargs)
return nomask
###############################################################################
# Masking functions #
###############################################################################
def masked_where(condition, a, copy=True):
"""
Mask an array where a condition is met.
Return `a` as an array masked where `condition` is True.
Any masked values of `a` or `condition` are also masked in the output.
Parameters
----------
condition : array_like
Masking condition. When `condition` tests floating point values for
equality, consider using ``masked_values`` instead.
a : array_like
Array to mask.
copy : bool
If True (default) make a copy of `a` in the result. If False modify
`a` in place and return a view.
Returns
-------
result : MaskedArray
The result of masking `a` where `condition` is True.
See Also
--------
masked_values : Mask using floating point equality.
masked_equal : Mask where equal to a given value.
masked_not_equal : Mask where `not` equal to a given value.
masked_less_equal : Mask where less than or equal to a given value.
masked_greater_equal : Mask where greater than or equal to a given value.
masked_less : Mask where less than a given value.
masked_greater : Mask where greater than a given value.
masked_inside : Mask inside a given interval.
masked_outside : Mask outside a given interval.
masked_invalid : Mask invalid values (NaNs or infs).
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_where(a <= 2, a)
masked_array(data = [-- -- -- 3],
mask = [ True True True False],
fill_value=999999)
Mask array `b` conditional on `a`.
>>> b = ['a', 'b', 'c', 'd']
>>> ma.masked_where(a == 2, b)
masked_array(data = [a b -- d],
mask = [False False True False],
fill_value=N/A)
Effect of the `copy` argument.
>>> c = ma.masked_where(a <= 2, a)
>>> c
masked_array(data = [-- -- -- 3],
mask = [ True True True False],
fill_value=999999)
>>> c[0] = 99
>>> c
masked_array(data = [99 -- -- 3],
mask = [False True True False],
fill_value=999999)
>>> a
array([0, 1, 2, 3])
>>> c = ma.masked_where(a <= 2, a, copy=False)
>>> c[0] = 99
>>> c
masked_array(data = [99 -- -- 3],
mask = [False True True False],
fill_value=999999)
>>> a
array([99, 1, 2, 3])
When `condition` or `a` contain masked values.
>>> a = np.arange(4)
>>> a = ma.masked_where(a == 2, a)
>>> a
masked_array(data = [0 1 -- 3],
mask = [False False True False],
fill_value=999999)
>>> b = np.arange(4)
>>> b = ma.masked_where(b == 0, b)
>>> b
masked_array(data = [-- 1 2 3],
mask = [ True False False False],
fill_value=999999)
>>> ma.masked_where(a == 3, b)
masked_array(data = [-- 1 -- --],
mask = [ True False True True],
fill_value=999999)
"""
# Make sure that condition is a valid standard-type mask.
cond = make_mask(condition, shrink=False)
a = np.array(a, copy=copy, subok=True)
(cshape, ashape) = (cond.shape, a.shape)
if cshape and cshape != ashape:
raise IndexError("Inconsistent shape between the condition and the input"
" (got %s and %s)" % (cshape, ashape))
if hasattr(a, '_mask'):
cond = mask_or(cond, a._mask)
cls = type(a)
else:
cls = MaskedArray
result = a.view(cls)
# Assign to *.mask so that structured masks are handled correctly.
result.mask = _shrink_mask(cond)
return result
def masked_greater(x, value, copy=True):
"""
Mask an array where greater than a given value.
This function is a shortcut to ``masked_where``, with
`condition` = (x > value).
See Also
--------
masked_where : Mask where a condition is met.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_greater(a, 2)
masked_array(data = [0 1 2 --],
mask = [False False False True],
fill_value=999999)
"""
return masked_where(greater(x, value), x, copy=copy)
def masked_greater_equal(x, value, copy=True):
"""
Mask an array where greater than or equal to a given value.
This function is a shortcut to ``masked_where``, with
`condition` = (x >= value).
See Also
--------
masked_where : Mask where a condition is met.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_greater_equal(a, 2)
masked_array(data = [0 1 -- --],
mask = [False False True True],
fill_value=999999)
"""
return masked_where(greater_equal(x, value), x, copy=copy)
def masked_less(x, value, copy=True):
"""
Mask an array where less than a given value.
This function is a shortcut to ``masked_where``, with
`condition` = (x < value).
See Also
--------
masked_where : Mask where a condition is met.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_less(a, 2)
masked_array(data = [-- -- 2 3],
mask = [ True True False False],
fill_value=999999)
"""
return masked_where(less(x, value), x, copy=copy)
def masked_less_equal(x, value, copy=True):
"""
Mask an array where less than or equal to a given value.
This function is a shortcut to ``masked_where``, with
`condition` = (x <= value).
See Also
--------
masked_where : Mask where a condition is met.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_less_equal(a, 2)
masked_array(data = [-- -- -- 3],
mask = [ True True True False],
fill_value=999999)
"""
return masked_where(less_equal(x, value), x, copy=copy)
def masked_not_equal(x, value, copy=True):
"""
Mask an array where `not` equal to a given value.
This function is a shortcut to ``masked_where``, with
`condition` = (x != value).
See Also
--------
masked_where : Mask where a condition is met.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_not_equal(a, 2)
masked_array(data = [-- -- 2 --],
mask = [ True True False True],
fill_value=999999)
"""
return masked_where(not_equal(x, value), x, copy=copy)
def masked_equal(x, value, copy=True):
"""
Mask an array where equal to a given value.
This function is a shortcut to ``masked_where``, with
`condition` = (x == value). For floating point arrays,
consider using ``masked_values(x, value)``.
See Also
--------
masked_where : Mask where a condition is met.
masked_values : Mask using floating point equality.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> ma.masked_equal(a, 2)
masked_array(data = [0 1 -- 3],
mask = [False False True False],
fill_value=999999)
"""
output = masked_where(equal(x, value), x, copy=copy)
output.fill_value = value
return output
def masked_inside(x, v1, v2, copy=True):
"""
Mask an array inside a given interval.
Shortcut to ``masked_where``, where `condition` is True for `x` inside
the interval [v1,v2] (v1 <= x <= v2). The boundaries `v1` and `v2`
can be given in either order.
See Also
--------
masked_where : Mask where a condition is met.
Notes
-----
The array `x` is prefilled with its filling value.
Examples
--------
>>> import numpy.ma as ma
>>> x = [0.31, 1.2, 0.01, 0.2, -0.4, -1.1]
>>> ma.masked_inside(x, -0.3, 0.3)
masked_array(data = [0.31 1.2 -- -- -0.4 -1.1],
mask = [False False True True False False],
fill_value=1e+20)
The order of `v1` and `v2` doesn't matter.
>>> ma.masked_inside(x, 0.3, -0.3)
masked_array(data = [0.31 1.2 -- -- -0.4 -1.1],
mask = [False False True True False False],
fill_value=1e+20)
"""
if v2 < v1:
(v1, v2) = (v2, v1)
xf = filled(x)
condition = (xf >= v1) & (xf <= v2)
return masked_where(condition, x, copy=copy)
def masked_outside(x, v1, v2, copy=True):
"""
Mask an array outside a given interval.
Shortcut to ``masked_where``, where `condition` is True for `x` outside
the interval [v1,v2] (x < v1)|(x > v2).
The boundaries `v1` and `v2` can be given in either order.
See Also
--------
masked_where : Mask where a condition is met.
Notes
-----
The array `x` is prefilled with its filling value.
Examples
--------
>>> import numpy.ma as ma
>>> x = [0.31, 1.2, 0.01, 0.2, -0.4, -1.1]
>>> ma.masked_outside(x, -0.3, 0.3)
masked_array(data = [-- -- 0.01 0.2 -- --],
mask = [ True True False False True True],
fill_value=1e+20)
The order of `v1` and `v2` doesn't matter.
>>> ma.masked_outside(x, 0.3, -0.3)
masked_array(data = [-- -- 0.01 0.2 -- --],
mask = [ True True False False True True],
fill_value=1e+20)
"""
if v2 < v1:
(v1, v2) = (v2, v1)
xf = filled(x)
condition = (xf < v1) | (xf > v2)
return masked_where(condition, x, copy=copy)
def masked_object(x, value, copy=True, shrink=True):
"""
Mask the array `x` where the data are exactly equal to value.
This function is similar to `masked_values`, but only suitable
for object arrays: for floating point, use `masked_values` instead.
Parameters
----------
x : array_like
Array to mask
value : object
Comparison value
copy : {True, False}, optional
Whether to return a copy of `x`.
shrink : {True, False}, optional
Whether to collapse a mask full of False to nomask
Returns
-------
result : MaskedArray
The result of masking `x` where equal to `value`.
See Also
--------
masked_where : Mask where a condition is met.
masked_equal : Mask where equal to a given value (integers).
masked_values : Mask using floating point equality.
Examples
--------
>>> import numpy.ma as ma
>>> food = np.array(['green_eggs', 'ham'], dtype=object)
>>> # don't eat spoiled food
>>> eat = ma.masked_object(food, 'green_eggs')
>>> print(eat)
[-- ham]
>>> # plain ol` ham is boring
>>> fresh_food = np.array(['cheese', 'ham', 'pineapple'], dtype=object)
>>> eat = ma.masked_object(fresh_food, 'green_eggs')
>>> print(eat)
[cheese ham pineapple]
Note that `mask` is set to ``nomask`` if possible.
>>> eat
masked_array(data = [cheese ham pineapple],
mask = False,
fill_value=?)
"""
if isMaskedArray(x):
condition = umath.equal(x._data, value)
mask = x._mask
else:
condition = umath.equal(np.asarray(x), value)
mask = nomask
mask = mask_or(mask, make_mask(condition, shrink=shrink))
return masked_array(x, mask=mask, copy=copy, fill_value=value)
def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True):
"""
Mask using floating point equality.
Return a MaskedArray, masked where the data in array `x` are approximately
equal to `value`, determined using `isclose`. The default tolerances for
`masked_values` are the same as those for `isclose`.
For integer types, exact equality is used, in the same way as
`masked_equal`.
The fill_value is set to `value` and the mask is set to ``nomask`` if
possible.
Parameters
----------
x : array_like
Array to mask.
value : float
Masking value.
rtol, atol : float, optional
Tolerance parameters passed on to `isclose`
copy : bool, optional
Whether to return a copy of `x`.
shrink : bool, optional
Whether to collapse a mask full of False to ``nomask``.
Returns
-------
result : MaskedArray
The result of masking `x` where approximately equal to `value`.
See Also
--------
masked_where : Mask where a condition is met.
masked_equal : Mask where equal to a given value (integers).
Examples
--------
>>> import numpy.ma as ma
>>> x = np.array([1, 1.1, 2, 1.1, 3])
>>> ma.masked_values(x, 1.1)
masked_array(data = [1.0 -- 2.0 -- 3.0],
mask = [False True False True False],
fill_value=1.1)
Note that `mask` is set to ``nomask`` if possible.
>>> ma.masked_values(x, 1.5)
masked_array(data = [ 1. 1.1 2. 1.1 3. ],
mask = False,
fill_value=1.5)
For integers, the fill value will be different in general to the
result of ``masked_equal``.
>>> x = np.arange(5)
>>> x
array([0, 1, 2, 3, 4])
>>> ma.masked_values(x, 2)
masked_array(data = [0 1 -- 3 4],
mask = [False False True False False],
fill_value=2)
>>> ma.masked_equal(x, 2)
masked_array(data = [0 1 -- 3 4],
mask = [False False True False False],
fill_value=999999)
"""
xnew = filled(x, value)
if np.issubdtype(xnew.dtype, np.floating):
mask = np.isclose(xnew, value, atol=atol, rtol=rtol)
else:
mask = umath.equal(xnew, value)
return masked_array(
xnew, mask=mask, copy=copy, fill_value=value, shrink=shrink)
def masked_invalid(a, copy=True):
"""
Mask an array where invalid values occur (NaNs or infs).
This function is a shortcut to ``masked_where``, with
`condition` = ~(np.isfinite(a)). Any pre-existing mask is conserved.
Only applies to arrays with a dtype where NaNs or infs make sense
(i.e. floating point types), but accepts any array_like object.
See Also
--------
masked_where : Mask where a condition is met.
Examples
--------
>>> import numpy.ma as ma
>>> a = np.arange(5, dtype=float)
>>> a[2] = np.NaN
>>> a[3] = np.PINF
>>> a
array([ 0., 1., NaN, Inf, 4.])
>>> ma.masked_invalid(a)
masked_array(data = [0.0 1.0 -- -- 4.0],
mask = [False False True True False],
fill_value=1e+20)
"""
a = np.array(a, copy=copy, subok=True)
mask = getattr(a, '_mask', None)
if mask is not None:
condition = ~(np.isfinite(getdata(a)))
if mask is not nomask:
condition |= mask
cls = type(a)
else:
condition = ~(np.isfinite(a))
cls = MaskedArray
result = a.view(cls)
result._mask = condition
return result
###############################################################################
# Printing options #
###############################################################################
class _MaskedPrintOption(object):
"""
Handle the string used to represent missing data in a masked array.
"""
def __init__(self, display):
"""
Create the masked_print_option object.
"""
self._display = display
self._enabled = True
def display(self):
"""
Display the string to print for masked values.
"""
return self._display
def set_display(self, s):
"""
Set the string to print for masked values.
"""
self._display = s
def enabled(self):
"""
Is the use of the display value enabled?
"""
return self._enabled
def enable(self, shrink=1):
"""
Set the enabling shrink to `shrink`.
"""
self._enabled = shrink
def __str__(self):
return str(self._display)
__repr__ = __str__
# if you single index into a masked location you get this object.
masked_print_option = _MaskedPrintOption('--')
def _recursive_printoption(result, mask, printopt):
"""
Puts printoptions in result where mask is True.
Private function allowing for recursion
"""
names = result.dtype.names
if names:
for name in names:
curdata = result[name]
curmask = mask[name]
_recursive_printoption(curdata, curmask, printopt)
else:
np.copyto(result, printopt, where=mask)
return
# For better or worse, these end in a newline
_legacy_print_templates = dict(
long_std=textwrap.dedent("""\
masked_%(name)s(data =
%(data)s,
%(nlen)s mask =
%(mask)s,
%(nlen)s fill_value = %(fill)s)
"""),
long_flx=textwrap.dedent("""\
masked_%(name)s(data =
%(data)s,
%(nlen)s mask =
%(mask)s,
%(nlen)s fill_value = %(fill)s,
%(nlen)s dtype = %(dtype)s)
"""),
short_std=textwrap.dedent("""\
masked_%(name)s(data = %(data)s,
%(nlen)s mask = %(mask)s,
%(nlen)s fill_value = %(fill)s)
"""),
short_flx=textwrap.dedent("""\
masked_%(name)s(data = %(data)s,
%(nlen)s mask = %(mask)s,
%(nlen)s fill_value = %(fill)s,
%(nlen)s dtype = %(dtype)s)
""")
)
###############################################################################
# MaskedArray class #
###############################################################################
def _recursive_filled(a, mask, fill_value):
"""
Recursively fill `a` with `fill_value`.
"""
names = a.dtype.names
for name in names:
current = a[name]
if current.dtype.names:
_recursive_filled(current, mask[name], fill_value[name])
else:
np.copyto(current, fill_value[name], where=mask[name])
def flatten_structured_array(a):
"""
Flatten a structured array.
The data type of the output is chosen such that it can represent all of the
(nested) fields.
Parameters
----------
a : structured array
Returns
-------
output : masked array or ndarray
A flattened masked array if the input is a masked array, otherwise a
standard ndarray.
Examples
--------
>>> ndtype = [('a', int), ('b', float)]
>>> a = np.array([(1, 1), (2, 2)], dtype=ndtype)
>>> flatten_structured_array(a)
array([[1., 1.],
[2., 2.]])
"""
def flatten_sequence(iterable):
"""
Flattens a compound of nested iterables.
"""
for elm in iter(iterable):
if hasattr(elm, '__iter__'):
for f in flatten_sequence(elm):
yield f
else:
yield elm
a = np.asanyarray(a)
inishape = a.shape
a = a.ravel()
if isinstance(a, MaskedArray):
out = np.array([tuple(flatten_sequence(d.item())) for d in a._data])
out = out.view(MaskedArray)
out._mask = np.array([tuple(flatten_sequence(d.item()))
for d in getmaskarray(a)])
else:
out = np.array([tuple(flatten_sequence(d.item())) for d in a])
if len(inishape) > 1:
newshape = list(out.shape)
newshape[0] = inishape
out.shape = tuple(flatten_sequence(newshape))
return out
def _arraymethod(funcname, onmask=True):
"""
Return a class method wrapper around a basic array method.
Creates a class method which returns a masked array, where the new
``_data`` array is the output of the corresponding basic method called
on the original ``_data``.
If `onmask` is True, the new mask is the output of the method called
on the initial mask. Otherwise, the new mask is just a reference
to the initial mask.
Parameters
----------
funcname : str
Name of the function to apply on data.
onmask : bool
Whether the mask must be processed also (True) or left
alone (False). Default is True. Make available as `_onmask`
attribute.
Returns
-------
method : instancemethod
Class method wrapper of the specified basic array method.
"""
def wrapped_method(self, *args, **params):
result = getattr(self._data, funcname)(*args, **params)
result = result.view(type(self))
result._update_from(self)
mask = self._mask
if not onmask:
result.__setmask__(mask)
elif mask is not nomask:
# __setmask__ makes a copy, which we don't want
result._mask = getattr(mask, funcname)(*args, **params)
return result
methdoc = getattr(ndarray, funcname, None) or getattr(np, funcname, None)
if methdoc is not None:
wrapped_method.__doc__ = methdoc.__doc__
wrapped_method.__name__ = funcname
return wrapped_method
class MaskedIterator(object):
"""
Flat iterator object to iterate over masked arrays.
A `MaskedIterator` iterator is returned by ``x.flat`` for any masked array
`x`. It allows iterating over the array as if it were a 1-D array,
either in a for-loop or by calling its `next` method.
Iteration is done in C-contiguous style, with the last index varying the
fastest. The iterator can also be indexed using basic slicing or
advanced indexing.
See Also
--------
MaskedArray.flat : Return a flat iterator over an array.
MaskedArray.flatten : Returns a flattened copy of an array.
Notes
-----
`MaskedIterator` is not exported by the `ma` module. Instead of
instantiating a `MaskedIterator` directly, use `MaskedArray.flat`.
Examples
--------
>>> x = np.ma.array(arange(6).reshape(2, 3))
>>> fl = x.flat
>>> type(fl)
<class 'numpy.ma.core.MaskedIterator'>
>>> for item in fl:
... print(item)
...
0
1
2
3
4
5
Extracting more than a single element b indexing the `MaskedIterator`
returns a masked array:
>>> fl[2:4]
masked_array(data = [2 3],
mask = False,
fill_value = 999999)
"""
def __init__(self, ma):
self.ma = ma
self.dataiter = ma._data.flat
if ma._mask is nomask:
self.maskiter = None
else:
self.maskiter = ma._mask.flat
def __iter__(self):
return self
def __getitem__(self, indx):
result = self.dataiter.__getitem__(indx).view(type(self.ma))
if self.maskiter is not None:
_mask = self.maskiter.__getitem__(indx)
if isinstance(_mask, ndarray):
# set shape to match that of data; this is needed for matrices
_mask.shape = result.shape
result._mask = _mask
elif isinstance(_mask, np.void):
return mvoid(result, mask=_mask, hardmask=self.ma._hardmask)
elif _mask: # Just a scalar, masked
return masked
return result
# This won't work if ravel makes a copy
def __setitem__(self, index, value):
self.dataiter[index] = getdata(value)
if self.maskiter is not None:
self.maskiter[index] = getmaskarray(value)
def __next__(self):
"""
Return the next value, or raise StopIteration.
Examples
--------
>>> x = np.ma.array([3, 2], mask=[0, 1])
>>> fl = x.flat
>>> fl.next()
3
>>> fl.next()
masked_array(data = --,
mask = True,
fill_value = 1e+20)
>>> fl.next()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/ralf/python/numpy/numpy/ma/core.py", line 2243, in next
d = self.dataiter.next()
StopIteration
"""
d = next(self.dataiter)
if self.maskiter is not None:
m = next(self.maskiter)
if isinstance(m, np.void):
return mvoid(d, mask=m, hardmask=self.ma._hardmask)
elif m: # Just a scalar, masked
return masked
return d
next = __next__
class MaskedArray(ndarray):
"""
An array class with possibly masked values.
Masked values of True exclude the corresponding element from any
computation.
Construction::
x = MaskedArray(data, mask=nomask, dtype=None, copy=False, subok=True,
ndmin=0, fill_value=None, keep_mask=True, hard_mask=None,
shrink=True, order=None)
Parameters
----------
data : array_like
Input data.
mask : sequence, optional
Mask. Must be convertible to an array of booleans with the same
shape as `data`. True indicates a masked (i.e. invalid) data.
dtype : dtype, optional
Data type of the output.
If `dtype` is None, the type of the data argument (``data.dtype``)
is used. If `dtype` is not None and different from ``data.dtype``,
a copy is performed.
copy : bool, optional
Whether to copy the input data (True), or to use a reference instead.
Default is False.
subok : bool, optional
Whether to return a subclass of `MaskedArray` if possible (True) or a
plain `MaskedArray`. Default is True.
ndmin : int, optional
Minimum number of dimensions. Default is 0.
fill_value : scalar, optional
Value used to fill in the masked values when necessary.
If None, a default based on the data-type is used.
keep_mask : bool, optional
Whether to combine `mask` with the mask of the input data, if any
(True), or to use only `mask` for the output (False). Default is True.
hard_mask : bool, optional
Whether to use a hard mask or not. With a hard mask, masked values
cannot be unmasked. Default is False.
shrink : bool, optional
Whether to force compression of an empty mask. Default is True.
order : {'C', 'F', 'A'}, optional
Specify the order of the array. If order is 'C', then the array
will be in C-contiguous order (last-index varies the fastest).
If order is 'F', then the returned array will be in
Fortran-contiguous order (first-index varies the fastest).
If order is 'A' (default), then the returned array may be
in any order (either C-, Fortran-contiguous, or even discontiguous),
unless a copy is required, in which case it will be C-contiguous.
"""
__array_priority__ = 15
_defaultmask = nomask
_defaulthardmask = False
_baseclass = ndarray
# Maximum number of elements per axis used when printing an array. The
# 1d case is handled separately because we need more values in this case.
_print_width = 100
_print_width_1d = 1500
def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
subok=True, ndmin=0, fill_value=None, keep_mask=True,
hard_mask=None, shrink=True, order=None, **options):
"""
Create a new masked array from scratch.
Notes
-----
A masked array can also be created by taking a .view(MaskedArray).
"""
# Process data.
_data = np.array(data, dtype=dtype, copy=copy,
order=order, subok=True, ndmin=ndmin)
_baseclass = getattr(data, '_baseclass', type(_data))
# Check that we're not erasing the mask.
if isinstance(data, MaskedArray) and (data.shape != _data.shape):
copy = True
# Here, we copy the _view_, so that we can attach new properties to it
# we must never do .view(MaskedConstant), as that would create a new
# instance of np.ma.masked, which make identity comparison fail
if isinstance(data, cls) and subok and not isinstance(data, MaskedConstant):
_data = ndarray.view(_data, type(data))
else:
_data = ndarray.view(_data, cls)
# Backwards compatibility w/ numpy.core.ma.
if hasattr(data, '_mask') and not isinstance(data, ndarray):
_data._mask = data._mask
# FIXME _sharedmask is never used.
_sharedmask = True
# Process mask.
# Number of named fields (or zero if none)
names_ = _data.dtype.names or ()
# Type of the mask
if names_:
mdtype = make_mask_descr(_data.dtype)
else:
mdtype = MaskType
if mask is nomask:
# Case 1. : no mask in input.
# Erase the current mask ?
if not keep_mask:
# With a reduced version
if shrink:
_data._mask = nomask
# With full version
else:
_data._mask = np.zeros(_data.shape, dtype=mdtype)
# Check whether we missed something
elif isinstance(data, (tuple, list)):
try:
# If data is a sequence of masked array
mask = np.array([getmaskarray(m) for m in data],
dtype=mdtype)
except ValueError:
# If data is nested
mask = nomask
# Force shrinking of the mask if needed (and possible)
if (mdtype == MaskType) and mask.any():
_data._mask = mask
_data._sharedmask = False
else:
if copy:
_data._mask = _data._mask.copy()
_data._sharedmask = False
# Reset the shape of the original mask
if getmask(data) is not nomask:
data._mask.shape = data.shape
else:
_data._sharedmask = True
else:
# Case 2. : With a mask in input.
# If mask is boolean, create an array of True or False
if mask is True and mdtype == MaskType:
mask = np.ones(_data.shape, dtype=mdtype)
elif mask is False and mdtype == MaskType:
mask = np.zeros(_data.shape, dtype=mdtype)
else:
# Read the mask with the current mdtype
try:
mask = np.array(mask, copy=copy, dtype=mdtype)
# Or assume it's a sequence of bool/int
except TypeError:
mask = np.array([tuple([m] * len(mdtype)) for m in mask],
dtype=mdtype)
# Make sure the mask and the data have the same shape
if mask.shape != _data.shape:
(nd, nm) = (_data.size, mask.size)
if nm == 1:
mask = np.resize(mask, _data.shape)
elif nm == nd:
mask = np.reshape(mask, _data.shape)
else:
msg = "Mask and data not compatible: data size is %i, " + \
"mask size is %i."
raise MaskError(msg % (nd, nm))
copy = True
# Set the mask to the new value
if _data._mask is nomask:
_data._mask = mask
_data._sharedmask = not copy
else:
if not keep_mask:
_data._mask = mask
_data._sharedmask = not copy
else:
if names_:
def _recursive_or(a, b):
"do a|=b on each field of a, recursively"
for name in a.dtype.names:
(af, bf) = (a[name], b[name])
if af.dtype.names:
_recursive_or(af, bf)
else:
af |= bf
return
_recursive_or(_data._mask, mask)
else:
_data._mask = np.logical_or(mask, _data._mask)
_data._sharedmask = False
# Update fill_value.
if fill_value is None:
fill_value = getattr(data, '_fill_value', None)
# But don't run the check unless we have something to check.
if fill_value is not None:
_data._fill_value = _check_fill_value(fill_value, _data.dtype)
# Process extra options ..
if hard_mask is None:
_data._hardmask = getattr(data, '_hardmask', False)
else:
_data._hardmask = hard_mask
_data._baseclass = _baseclass
return _data
def _update_from(self, obj):
"""
Copies some attributes of obj to self.
"""
if isinstance(obj, ndarray):
_baseclass = type(obj)
else:
_baseclass = ndarray
# We need to copy the _basedict to avoid backward propagation
_optinfo = {}
_optinfo.update(getattr(obj, '_optinfo', {}))
_optinfo.update(getattr(obj, '_basedict', {}))
if not isinstance(obj, MaskedArray):
_optinfo.update(getattr(obj, '__dict__', {}))
_dict = dict(_fill_value=getattr(obj, '_fill_value', None),
_hardmask=getattr(obj, '_hardmask', False),
_sharedmask=getattr(obj, '_sharedmask', False),
_isfield=getattr(obj, '_isfield', False),
_baseclass=getattr(obj, '_baseclass', _baseclass),
_optinfo=_optinfo,
_basedict=_optinfo)
self.__dict__.update(_dict)
self.__dict__.update(_optinfo)
return
def __array_finalize__(self, obj):
"""
Finalizes the masked array.
"""
# Get main attributes.
self._update_from(obj)
# We have to decide how to initialize self.mask, based on
# obj.mask. This is very difficult. There might be some
# correspondence between the elements in the array we are being
# created from (= obj) and us. Or there might not. This method can
# be called in all kinds of places for all kinds of reasons -- could
# be empty_like, could be slicing, could be a ufunc, could be a view.
# The numpy subclassing interface simply doesn't give us any way
# to know, which means that at best this method will be based on
# guesswork and heuristics. To make things worse, there isn't even any
# clear consensus about what the desired behavior is. For instance,
# most users think that np.empty_like(marr) -- which goes via this
# method -- should return a masked array with an empty mask (see
# gh-3404 and linked discussions), but others disagree, and they have
# existing code which depends on empty_like returning an array that
# matches the input mask.
#
# Historically our algorithm was: if the template object mask had the
# same *number of elements* as us, then we used *it's mask object
# itself* as our mask, so that writes to us would also write to the
# original array. This is horribly broken in multiple ways.
#
# Now what we do instead is, if the template object mask has the same
# number of elements as us, and we do not have the same base pointer
# as the template object (b/c views like arr[...] should keep the same
# mask), then we make a copy of the template object mask and use
# that. This is also horribly broken but somewhat less so. Maybe.
if isinstance(obj, ndarray):
# XX: This looks like a bug -- shouldn't it check self.dtype
# instead?
if obj.dtype.names:
_mask = getmaskarray(obj)
else:
_mask = getmask(obj)
# If self and obj point to exactly the same data, then probably
# self is a simple view of obj (e.g., self = obj[...]), so they
# should share the same mask. (This isn't 100% reliable, e.g. self
# could be the first row of obj, or have strange strides, but as a
# heuristic it's not bad.) In all other cases, we make a copy of
# the mask, so that future modifications to 'self' do not end up
# side-effecting 'obj' as well.
if (_mask is not nomask and obj.__array_interface__["data"][0]
!= self.__array_interface__["data"][0]):
# We should make a copy. But we could get here via astype,
# in which case the mask might need a new dtype as well
# (e.g., changing to or from a structured dtype), and the
# order could have changed. So, change the mask type if
# needed and use astype instead of copy.
if self.dtype == obj.dtype:
_mask_dtype = _mask.dtype
else:
_mask_dtype = make_mask_descr(self.dtype)
if self.flags.c_contiguous:
order = "C"
elif self.flags.f_contiguous:
order = "F"
else:
order = "K"
_mask = _mask.astype(_mask_dtype, order)
else:
_mask = nomask
self._mask = _mask
# Finalize the mask
if self._mask is not nomask:
try:
self._mask.shape = self.shape
except ValueError:
self._mask = nomask
except (TypeError, AttributeError):
# When _mask.shape is not writable (because it's a void)
pass
# Finalize the fill_value for structured arrays
if self.dtype.names:
if self._fill_value is None:
self._fill_value = _check_fill_value(None, self.dtype)
return
def __array_wrap__(self, obj, context=None):
"""
Special hook for ufuncs.
Wraps the numpy array and sets the mask according to context.
"""
if obj is self: # for in-place operations
result = obj
else:
result = obj.view(type(self))
result._update_from(self)
if context is not None:
result._mask = result._mask.copy()
func, args, out_i = context
# args sometimes contains outputs (gh-10459), which we don't want
input_args = args[:func.nin]
m = reduce(mask_or, [getmaskarray(arg) for arg in input_args])
# Get the domain mask
domain = ufunc_domain.get(func, None)
if domain is not None:
# Take the domain, and make sure it's a ndarray
with np.errstate(divide='ignore', invalid='ignore'):
d = filled(domain(*input_args), True)
if d.any():
# Fill the result where the domain is wrong
try:
# Binary domain: take the last value
fill_value = ufunc_fills[func][-1]
except TypeError:
# Unary domain: just use this one
fill_value = ufunc_fills[func]
except KeyError:
# Domain not recognized, use fill_value instead
fill_value = self.fill_value
np.copyto(result, fill_value, where=d)
# Update the mask
if m is nomask:
m = d
else:
# Don't modify inplace, we risk back-propagation
m = (m | d)
# Make sure the mask has the proper size
if result is not self and result.shape == () and m:
return masked
else:
result._mask = m
result._sharedmask = False
return result
def view(self, dtype=None, type=None, fill_value=None):
"""
Return a view of the MaskedArray data
Parameters
----------
dtype : data-type or ndarray sub-class, optional
Data-type descriptor of the returned view, e.g., float32 or int16.
The default, None, results in the view having the same data-type
as `a`. As with ``ndarray.view``, dtype can also be specified as
an ndarray sub-class, which then specifies the type of the
returned object (this is equivalent to setting the ``type``
parameter).
type : Python type, optional
Type of the returned view, e.g., ndarray or matrix. Again, the
default None results in type preservation.
Notes
-----
``a.view()`` is used two different ways:
``a.view(some_dtype)`` or ``a.view(dtype=some_dtype)`` constructs a view
of the array's memory with a different data-type. This can cause a
reinterpretation of the bytes of memory.
``a.view(ndarray_subclass)`` or ``a.view(type=ndarray_subclass)`` just
returns an instance of `ndarray_subclass` that looks at the same array
(same shape, dtype, etc.) This does not cause a reinterpretation of the
memory.
If `fill_value` is not specified, but `dtype` is specified (and is not
an ndarray sub-class), the `fill_value` of the MaskedArray will be
reset. If neither `fill_value` nor `dtype` are specified (or if
`dtype` is an ndarray sub-class), then the fill value is preserved.
Finally, if `fill_value` is specified, but `dtype` is not, the fill
value is set to the specified value.
For ``a.view(some_dtype)``, if ``some_dtype`` has a different number of
bytes per entry than the previous dtype (for example, converting a
regular array to a structured array), then the behavior of the view
cannot be predicted just from the superficial appearance of ``a`` (shown
by ``print(a)``). It also depends on exactly how ``a`` is stored in
memory. Therefore if ``a`` is C-ordered versus fortran-ordered, versus
defined as a slice or transpose, etc., the view may give different
results.
"""
if dtype is None:
if type is None:
output = ndarray.view(self)
else:
output = ndarray.view(self, type)
elif type is None:
try:
if issubclass(dtype, ndarray):
output = ndarray.view(self, dtype)
dtype = None
else:
output = ndarray.view(self, dtype)
except TypeError:
output = ndarray.view(self, dtype)
else:
output = ndarray.view(self, dtype, type)
# also make the mask be a view (so attr changes to the view's
# mask do no affect original object's mask)
# (especially important to avoid affecting np.masked singleton)
if (getmask(output) is not nomask):
output._mask = output._mask.view()
# Make sure to reset the _fill_value if needed
if getattr(output, '_fill_value', None) is not None:
if fill_value is None:
if dtype is None:
pass # leave _fill_value as is
else:
output._fill_value = None
else:
output.fill_value = fill_value
return output
view.__doc__ = ndarray.view.__doc__
def __getitem__(self, indx):
"""
x.__getitem__(y) <==> x[y]
Return the item described by i, as a masked array.
"""
# We could directly use ndarray.__getitem__ on self.
# But then we would have to modify __array_finalize__ to prevent the
# mask of being reshaped if it hasn't been set up properly yet
# So it's easier to stick to the current version
dout = self.data[indx]
_mask = self._mask
def _is_scalar(m):
return not isinstance(m, np.ndarray)
def _scalar_heuristic(arr, elem):
"""
Return whether `elem` is a scalar result of indexing `arr`, or None
if undecidable without promoting nomask to a full mask
"""
# obviously a scalar
if not isinstance(elem, np.ndarray):
return True
# object array scalar indexing can return anything
elif arr.dtype.type is np.object_:
if arr.dtype is not elem.dtype:
# elem is an array, but dtypes do not match, so must be
# an element
return True
# well-behaved subclass that only returns 0d arrays when
# expected - this is not a scalar
elif type(arr).__getitem__ == ndarray.__getitem__:
return False
return None
if _mask is not nomask:
# _mask cannot be a subclass, so it tells us whether we should
# expect a scalar. It also cannot be of dtype object.
mout = _mask[indx]
scalar_expected = _is_scalar(mout)
else:
# attempt to apply the heuristic to avoid constructing a full mask
mout = nomask
scalar_expected = _scalar_heuristic(self.data, dout)
if scalar_expected is None:
# heuristics have failed
# construct a full array, so we can be certain. This is costly.
# we could also fall back on ndarray.__getitem__(self.data, indx)
scalar_expected = _is_scalar(getmaskarray(self)[indx])
# Did we extract a single item?
if scalar_expected:
# A record
if isinstance(dout, np.void):
# We should always re-cast to mvoid, otherwise users can
# change masks on rows that already have masked values, but not
# on rows that have no masked values, which is inconsistent.
return mvoid(dout, mask=mout, hardmask=self._hardmask)
# special case introduced in gh-5962
elif (self.dtype.type is np.object_ and
isinstance(dout, np.ndarray) and
dout is not masked):
# If masked, turn into a MaskedArray, with everything masked.
if mout:
return MaskedArray(dout, mask=True)
else:
return dout
# Just a scalar
else:
if mout:
return masked
else:
return dout
else:
# Force dout to MA
dout = dout.view(type(self))
# Inherit attributes from self
dout._update_from(self)
# Check the fill_value
if isinstance(indx, basestring):
if self._fill_value is not None:
dout._fill_value = self._fill_value[indx]
# If we're indexing a multidimensional field in a
# structured array (such as dtype("(2,)i2,(2,)i1")),
# dimensionality goes up (M[field].ndim == M.ndim +
# M.dtype[field].ndim). That's fine for
# M[field] but problematic for M[field].fill_value
# which should have shape () to avoid breaking several
# methods. There is no great way out, so set to
# first element. See issue #6723.
if dout._fill_value.ndim > 0:
if not (dout._fill_value ==
dout._fill_value.flat[0]).all():
warnings.warn(
"Upon accessing multidimensional field "
"{indx:s}, need to keep dimensionality "
"of fill_value at 0. Discarding "
"heterogeneous fill_value and setting "
"all to {fv!s}.".format(indx=indx,
fv=dout._fill_value[0]),
stacklevel=2)
dout._fill_value = dout._fill_value.flat[0]
dout._isfield = True
# Update the mask if needed
if mout is not nomask:
# set shape to match that of data; this is needed for matrices
dout._mask = reshape(mout, dout.shape)
dout._sharedmask = True
# Note: Don't try to check for m.any(), that'll take too long
return dout
def __setitem__(self, indx, value):
"""
x.__setitem__(i, y) <==> x[i]=y
Set item described by index. If value is masked, masks those
locations.
"""
if self is masked:
raise MaskError('Cannot alter the masked element.')
_data = self._data
_mask = self._mask
if isinstance(indx, basestring):
_data[indx] = value
if _mask is nomask:
self._mask = _mask = make_mask_none(self.shape, self.dtype)
_mask[indx] = getmask(value)
return
_dtype = _data.dtype
nbfields = len(_dtype.names or ())
if value is masked:
# The mask wasn't set: create a full version.
if _mask is nomask:
_mask = self._mask = make_mask_none(self.shape, _dtype)
# Now, set the mask to its value.
if nbfields:
_mask[indx] = tuple([True] * nbfields)
else:
_mask[indx] = True
return
# Get the _data part of the new value
dval = getattr(value, '_data', value)
# Get the _mask part of the new value
mval = getmask(value)
if nbfields and mval is nomask:
mval = tuple([False] * nbfields)
if _mask is nomask:
# Set the data, then the mask
_data[indx] = dval
if mval is not nomask:
_mask = self._mask = make_mask_none(self.shape, _dtype)
_mask[indx] = mval
elif not self._hardmask:
# Set the data, then the mask
_data[indx] = dval
_mask[indx] = mval
elif hasattr(indx, 'dtype') and (indx.dtype == MaskType):
indx = indx * umath.logical_not(_mask)
_data[indx] = dval
else:
if nbfields:
err_msg = "Flexible 'hard' masks are not yet supported."
raise NotImplementedError(err_msg)
mindx = mask_or(_mask[indx], mval, copy=True)
dindx = self._data[indx]
if dindx.size > 1:
np.copyto(dindx, dval, where=~mindx)
elif mindx is nomask:
dindx = dval
_data[indx] = dindx
_mask[indx] = mindx
return
def __setattr__(self, attr, value):
super(MaskedArray, self).__setattr__(attr, value)
if attr == 'dtype' and self._mask is not nomask:
self._mask = self._mask.view(make_mask_descr(value), ndarray)
# Try to reset the shape of the mask (if we don't have a void)
# This raises a ValueError if the dtype change won't work
try:
self._mask.shape = self.shape
except (AttributeError, TypeError):
pass
def __setmask__(self, mask, copy=False):
"""
Set the mask.
"""
idtype = self.dtype
current_mask = self._mask
if mask is masked:
mask = True
if (current_mask is nomask):
# Make sure the mask is set
# Just don't do anything if there's nothing to do.
if mask is nomask:
return
current_mask = self._mask = make_mask_none(self.shape, idtype)
if idtype.names is None:
# No named fields.
# Hardmask: don't unmask the data
if self._hardmask:
current_mask |= mask
# Softmask: set everything to False
# If it's obviously a compatible scalar, use a quick update
# method.
elif isinstance(mask, (int, float, np.bool_, np.number)):
current_mask[...] = mask
# Otherwise fall back to the slower, general purpose way.
else:
current_mask.flat = mask
else:
# Named fields w/
mdtype = current_mask.dtype
mask = np.array(mask, copy=False)
# Mask is a singleton
if not mask.ndim:
# It's a boolean : make a record
if mask.dtype.kind == 'b':
mask = np.array(tuple([mask.item()] * len(mdtype)),
dtype=mdtype)
# It's a record: make sure the dtype is correct
else:
mask = mask.astype(mdtype)
# Mask is a sequence
else:
# Make sure the new mask is a ndarray with the proper dtype
try:
mask = np.array(mask, copy=copy, dtype=mdtype)
# Or assume it's a sequence of bool/int
except TypeError:
mask = np.array([tuple([m] * len(mdtype)) for m in mask],
dtype=mdtype)
# Hardmask: don't unmask the data
if self._hardmask:
for n in idtype.names:
current_mask[n] |= mask[n]
# Softmask: set everything to False
# If it's obviously a compatible scalar, use a quick update
# method.
elif isinstance(mask, (int, float, np.bool_, np.number)):
current_mask[...] = mask
# Otherwise fall back to the slower, general purpose way.
else:
current_mask.flat = mask
# Reshape if needed
if current_mask.shape:
current_mask.shape = self.shape
return
_set_mask = __setmask__
def _get_mask(self):
"""Return the current mask.
"""
# We could try to force a reshape, but that wouldn't work in some
# cases.
return self._mask
mask = property(fget=_get_mask, fset=__setmask__, doc="Mask")
def _get_recordmask(self):
"""
Return the mask of the records.
A record is masked when all the fields are masked.
"""
_mask = self._mask.view(ndarray)
if _mask.dtype.names is None:
return _mask
return np.all(flatten_structured_array(_mask), axis=-1)
def _set_recordmask(self):
"""
Return the mask of the records.
A record is masked when all the fields are masked.
"""
raise NotImplementedError("Coming soon: setting the mask per records!")
recordmask = property(fget=_get_recordmask)
def harden_mask(self):
"""
Force the mask to hard.
Whether the mask of a masked array is hard or soft is determined by
its `hardmask` property. `harden_mask` sets `hardmask` to True.
See Also
--------
hardmask
"""
self._hardmask = True
return self
def soften_mask(self):
"""
Force the mask to soft.
Whether the mask of a masked array is hard or soft is determined by
its `hardmask` property. `soften_mask` sets `hardmask` to False.
See Also
--------
hardmask
"""
self._hardmask = False
return self
hardmask = property(fget=lambda self: self._hardmask,
doc="Hardness of the mask")
def unshare_mask(self):
"""
Copy the mask and set the sharedmask flag to False.