Permalink
Browse files

ENH: missingdata: Rewrite PyArray_Concatenate to work with NA masks

It should also have less memory usage for heterogeneous inputs,
because it no longer makes extra copies in that case.
  • Loading branch information...
mwiebe authored and charris committed Aug 16, 2011
1 parent 99a21ef commit 9194b3af704df71aa9b1ff2f53f169848d0f9dc7
@@ -29,9 +29,12 @@ What works with NA:
* Array methods:
+ ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod,
ndarray.conjugate, ndarray.diagonal
+ + numpy.concatenate
What doesn't work with NA:
* Fancy indexing, such as with lists and partial boolean masks.
+ * ndarray.flat and any other methods that use the old iterator
+ mechanism instead of the newer nditer.
* UFunc.reduce of multi-dimensional arrays, with skipna=True and a ufunc
that doesn't have an identity.
* UFunc.accumulate, UFunc.reduceat.
@@ -1667,18 +1667,20 @@ Conversion
copied into every location. A -1 is returned if an error occurs,
otherwise 0 is returned.
-.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype)
-
- Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new view of
- the array *self* as possibly a different data-type, *dtype*. If
- *dtype* is ``NULL``, then the returned array will have the same
- data type as *self*. The new data-type must be consistent with
- the size of *self*. Either the itemsizes must be identical, or
- *self* must be single-segment and the total number of bytes must
- be the same. In the latter case the dimensions of the returned
- array will be altered in the last (or first for Fortran-style
- contiguous arrays) dimension. The data area of the returned array
- and self is exactly the same.
+.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype, PyTypeObject *ptype)
+
+ Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new
+ view of the array *self* as possibly a different data-type, *dtype*,
+ and different array subclass *ptype*.
+
+ If *dtype* is ``NULL``, then the returned array will have the same
+ data type as *self*. The new data-type must be consistent with the
+ size of *self*. Either the itemsizes must be identical, or *self* must
+ be single-segment and the total number of bytes must be the same.
+ In the latter case the dimensions of the returned array will be
+ altered in the last (or first for Fortran-style contiguous arrays)
+ dimension. The data area of the returned array and self is exactly
+ the same.
Shape Manipulation
View
@@ -3702,7 +3702,7 @@ def luf(lamdaexpr, *args, **kwargs):
add_newdoc('numpy.core.multiarray', 'copyto',
"""
- copyto(dst, src, casting='same_kind', where=None)
+ copyto(dst, src, casting='same_kind', where=None, preservena=False)
Copies values from `src` into `dst`, broadcasting as necessary.
Raises a TypeError if the casting rule is violated, and if
@@ -3725,10 +3725,13 @@ def luf(lamdaexpr, *args, **kwargs):
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
- where : array_like of bool
+ where : array_like of bool, optional
A boolean array which is broadcasted to match the dimensions
of `dst`, and selects elements to copy from `src` to `dst`
wherever it contains the value True.
+ preservena : bool, optional
+ If set to True, leaves any NA values in `dst` untouched. This
+ is similar to the "hard mask" feature in numpy.ma.
""")
View
@@ -267,5 +267,10 @@ def hstack(tup):
[3, 4]])
"""
- return _nx.concatenate(map(atleast_1d,tup),1)
+ arrs = map(atleast_1d,tup)
+ # As a special case, dimension 0 of 1-dimensional arrays is "horizontal"
+ if arrs[0].ndim == 1:
+ return _nx.concatenate(arrs, 0)
+ else:
+ return _nx.concatenate(arrs, 1)
@@ -1331,7 +1331,13 @@ NPY_NO_EXPORT PyArray_Descr *
PyArray_MinScalarType(PyArrayObject *arr)
{
PyArray_Descr *dtype = PyArray_DESCR(arr);
- if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num)) {
+ /*
+ * If the array isn't a numeric scalar or is a scalar but with
+ * its value masked out, just return the array's dtype.
+ */
+ if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num) ||
+ (PyArray_HASMASKNA(arr) && !NpyMaskValue_IsExposed(
+ (npy_mask)*PyArray_MASKNA_DATA(arr)))) {
Py_INCREF(dtype);
return dtype;
}
@@ -1175,6 +1175,7 @@ PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER order,
int idim;
PyArray_CreateSortedStridePerm(PyArray_NDIM(prototype),
+ PyArray_SHAPE(prototype),
PyArray_STRIDES(prototype),
strideperm);
@@ -3922,7 +3922,7 @@ PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, strides, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, strides, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
@@ -4052,7 +4052,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
@@ -4186,7 +4186,7 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
@@ -4324,7 +4324,7 @@ PyArray_PrepareFourRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
Oops, something went wrong.

0 comments on commit 9194b3a

Please sign in to comment.