Skip to content

Commit

Permalink
API: Fix structured dtype cast-safety, promotion, and comparison
Browse files Browse the repository at this point in the history
This PR replaces the old numpygh-15509 implementing proper type promotion
for structured voids.  It further fixes the casting safety to consider
casts with equivalent field number and matching order as "safe"
and if the names, titles, and offsets match as "equiv".

The change perculates into the void comparison, and since it fixes
the order, it removes the current FutureWarning there as well.

This addresses liberfa/pyerfa#77
and replaces numpygh-15509 (the implementation has changed too much).

Fixes numpygh-15494  (and probably a few more)

Co-authored-by: Allan Haldane <allan.haldane@gmail.com>
  • Loading branch information
seberg and ahaldane committed Jun 11, 2021
1 parent 341736a commit 3b86a32
Show file tree
Hide file tree
Showing 10 changed files with 293 additions and 137 deletions.
33 changes: 32 additions & 1 deletion numpy/core/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import platform
import warnings

from .multiarray import dtype, array, ndarray
from .multiarray import dtype, array, ndarray, promote_types
try:
import ctypes
except ImportError:
Expand Down Expand Up @@ -433,6 +433,37 @@ def _copy_fields(ary):
'formats': [dt.fields[name][0] for name in dt.names]}
return array(ary, dtype=copy_dtype, copy=True)

def _promote_fields(dt1, dt2):
""" Perform type promotion for two structured dtypes.
Parameters
----------
dt1 : structured dtype
First dtype.
dt2 : structured dtype
Second dtype.
Returns
-------
out : dtype
The promoted dtype
Notes
-----
This function ignores the "titles"
"""
if (dt1.names is None or dt2.names is None) or dt1.names != dt2.names:
raise TypeError("invalid type promotion")

# If the lengths all match, assume the titles (if existing) match the
# names.
if not len(dt1.names) == len(dt1.fields) == len(dt2.fields):
raise TypeError(
"NumPy currently does not support promotion with field titles.")

return dtype([(name, promote_types(dt1[name], dt2[name]))
for name in dt1.names])

def _getfield_is_safe(oldtype, newtype, offset):
""" Checks safety of getfield for object arrays.
Expand Down
101 changes: 70 additions & 31 deletions numpy/core/src/multiarray/arrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1046,31 +1046,71 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
"Void-arrays can only be compared for equality.");
return NULL;
}
if (PyArray_HASFIELDS(self)) {
PyObject *res = NULL, *temp, *a, *b;
PyObject *key, *value, *temp2;
PyObject *op;
Py_ssize_t pos = 0;
if (PyArray_TYPE(other) != NPY_VOID) {
PyErr_SetString(PyExc_ValueError,
"Cannot compare structured or void to non-void arrays. "
"(This may return array of False in the future.)");
return NULL;
}

if (PyArray_HASFIELDS(self) && PyArray_HASFIELDS(other)) {
PyArray_Descr *self_descr = PyArray_DESCR(self);
PyArray_Descr *other_descr = PyArray_DESCR(other);

npy_intp result_ndim = PyArray_NDIM(self) > PyArray_NDIM(other) ?
PyArray_NDIM(self) : PyArray_NDIM(other);

op = (cmp_op == Py_EQ ? n_ops.logical_and : n_ops.logical_or);
while (PyDict_Next(PyArray_DESCR(self)->fields, &pos, &key, &value)) {
if (NPY_TITLE_KEY(key, value)) {
continue;
}
a = array_subscript_asarray(self, key);
int field_count = PyTuple_GET_SIZE(self_descr->names);
if (field_count != PyTuple_GET_SIZE(other_descr->names)) {
PyErr_SetString(PyExc_TypeError,
"Cannot compare structured dtypes with different number of "
"fields. (This may return array of False in the future.)");
return NULL;
}

PyObject *op = (cmp_op == Py_EQ ? n_ops.logical_and : n_ops.logical_or);
PyObject *res = NULL;
for (int i = 0; i < field_count; ++i) {
PyObject *fieldname, *temp, *temp2;

fieldname = PyTuple_GET_ITEM(self_descr->names, i);
PyArrayObject *a = (PyArrayObject *)array_subscript_asarray(
self, fieldname);
if (a == NULL) {
Py_XDECREF(res);
return NULL;
}
b = array_subscript_asarray(other, key);
fieldname = PyTuple_GET_ITEM(other_descr->names, i);
PyArrayObject *b = (PyArrayObject *)array_subscript_asarray(
other, fieldname);
if (b == NULL) {
Py_XDECREF(res);
Py_DECREF(a);
return NULL;
}
temp = array_richcompare((PyArrayObject *)a,b,cmp_op);
/*
* If the fields were subarrays, the dimensions may have changed.
* In that case, the new shape (subarray part) must match exactly.
* (If this is 0, there is no subarray.)
*/
int field_dims_a = PyArray_NDIM(a) - PyArray_NDIM(self);
int field_dims_b = PyArray_NDIM(b) - PyArray_NDIM(other);
if (field_dims_a != field_dims_b || (
field_dims_a != 0 && /* neither is subarray */
/* Compare only the added (subarray) dimensions: */
!PyArray_CompareLists(
PyArray_DIMS(a) + PyArray_NDIM(self),
PyArray_DIMS(b) + PyArray_NDIM(other),
field_dims_a))) {
PyErr_SetString(PyExc_TypeError,
"Cannot compare subarrays with different shapes.");
Py_DECREF(a);
Py_DECREF(b);
Py_XDECREF(res);
return NULL;
}

temp = array_richcompare(a, (PyObject *)b,cmp_op);
Py_DECREF(a);
Py_DECREF(b);
if (temp == NULL) {
Expand Down Expand Up @@ -1155,7 +1195,23 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
}
return res;
}
else if (PyArray_HASFIELDS(self) || PyArray_HASFIELDS(other)) {
PyErr_SetString(PyExc_TypeError,
"Cannot compare structured with unstructured void. "
"(This may return array of False in the future.)");
return NULL;
}
else {
/* Cannot have subarray, since subarray is absorbed into array. */
assert(PyArray_DESCR(self)->subarray == NULL);
assert(PyArray_DESCR(other)->subarray == NULL);
if (PyArray_ITEMSIZE(self) != PyArray_ITEMSIZE(other)) {
PyErr_SetString(PyExc_TypeError,
"cannot compare unstructured voids of different length. "
"Use bytes to compare. "
"(This may return array of False in the future.)");
return NULL;
}
/* compare as a string. Assumes self and other have same descr->type */
return _strings_richcompare(self, other, cmp_op, 0);
}
Expand Down Expand Up @@ -1358,24 +1414,7 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
return Py_NotImplemented;
}

_res = PyArray_CanCastTypeTo(PyArray_DESCR(self),
PyArray_DESCR(array_other),
NPY_EQUIV_CASTING);
if (_res == 0) {
/* 2015-05-07, 1.10 */
Py_DECREF(array_other);
if (DEPRECATE_FUTUREWARNING(
"elementwise == comparison failed and returning scalar "
"instead; this will raise an error or perform "
"elementwise comparison in the future.") < 0) {
return NULL;
}
Py_INCREF(Py_False);
return Py_False;
}
else {
result = _void_compare(self, array_other, cmp_op);
}
result = _void_compare(self, array_other, cmp_op);
Py_DECREF(array_other);
return result;
}
Expand Down
104 changes: 66 additions & 38 deletions numpy/core/src/multiarray/convert_datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -3162,10 +3162,25 @@ can_cast_fields_safety(PyArray_Descr *from, PyArray_Descr *to)
{
NPY_CASTING casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW;

/*
* If the itemsize (includes padding at the end) does not match, consider
* this a SAFE cast only. A view may be OK if we shrink.
*/
if (from->elsize > to->elsize) {
/*
* The itemsize may mismatch even if all fields and formats match
* (due to additional padding).
*/
casting = PyArray_MinCastSafety(
casting, NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW);
}
else if (from->elsize < to->elsize) {
casting = PyArray_MinCastSafety(casting, NPY_SAFE_CASTING);
}

Py_ssize_t field_count = PyTuple_Size(from->names);
if (field_count != PyTuple_Size(to->names)) {
/* TODO: This should be rejected! */
return NPY_UNSAFE_CASTING;
return -1;
}
for (Py_ssize_t i = 0; i < field_count; i++) {
PyObject *from_key = PyTuple_GET_ITEM(from->names, i);
Expand All @@ -3175,56 +3190,69 @@ can_cast_fields_safety(PyArray_Descr *from, PyArray_Descr *to)
}
PyArray_Descr *from_base = (PyArray_Descr*)PyTuple_GET_ITEM(from_tup, 0);

/*
* TODO: This should use to_key (order), compare gh-15509 by
* by Allan Haldane. And raise an error on failure.
* (Fixing that may also requires fixing/changing promotion.)
*/
PyObject *to_tup = PyDict_GetItem(to->fields, from_key);
PyObject *to_key = PyTuple_GET_ITEM(to->names, i);
PyObject *to_tup = PyDict_GetItem(to->fields, to_key);
if (to_tup == NULL) {
return NPY_UNSAFE_CASTING;
return give_bad_field_error(from_key);
}
PyArray_Descr *to_base = (PyArray_Descr*)PyTuple_GET_ITEM(to_tup, 0);

int cmp = PyUnicode_Compare(from_key, to_key);
if (error_converting(cmp)) {
return -1;
}
if (cmp != 0) {
/*
* Field name mismatch, consider this at most SAFE.
*/
casting = PyArray_MinCastSafety(
casting, NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW);
}
NPY_CASTING field_casting = PyArray_GetCastSafety(from_base, to_base, NULL);
if (field_casting < 0) {
return -1;
}
casting = PyArray_MinCastSafety(casting, field_casting);
if (casting & _NPY_CAST_IS_VIEW) {
/*
* Unset cast-is-view (and use at most equivlanet casting) if the
* field offsets do not match. (not no-casting)
*/
PyObject *from_offset = PyTuple_GET_ITEM(from_tup, 1);
PyObject *to_offset = PyTuple_GET_ITEM(to_tup, 1);
cmp = PyObject_RichCompareBool(from_offset, to_offset, Py_EQ);
if (error_converting(cmp)) {
assert(0); /* Both are longs, this should never fail */
return -1;
}
if (!cmp) {
casting = PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
}
}

/* Also check the title (denote mismatch as SAFE only) */
PyObject *from_title = from_key;
PyObject *to_title = to_key;
if (PyTuple_GET_SIZE(from_tup) > 2) {
from_title = PyTuple_GET_ITEM(from_tup, 2);
}
if (PyTuple_GET_SIZE(to_tup) > 2) {
to_title = PyTuple_GET_ITEM(to_tup, 2);
}
cmp = PyObject_RichCompareBool(from_title, to_title, Py_EQ);
if (error_converting(cmp)) {
return -1;
}
if (!cmp) {
casting = PyArray_MinCastSafety(
casting, NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW);
}

}
if (!(casting & _NPY_CAST_IS_VIEW)) {
assert((casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING);
return casting;
}

/*
* If the itemsize (includes padding at the end), fields, or names
* do not match, this cannot be a view and also not a "no" cast
* (identical dtypes).
* It may be possible that this can be relaxed in some cases.
*/
if (from->elsize != to->elsize) {
/*
* The itemsize may mismatch even if all fields and formats match
* (due to additional padding).
*/
return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
}

int cmp = PyObject_RichCompareBool(from->fields, to->fields, Py_EQ);
if (cmp != 1) {
if (cmp == -1) {
PyErr_Clear();
}
return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
}
cmp = PyObject_RichCompareBool(from->names, to->names, Py_EQ);
if (cmp != 1) {
if (cmp == -1) {
PyErr_Clear();
}
return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
}
return casting;
}

Expand Down
68 changes: 56 additions & 12 deletions numpy/core/src/multiarray/dtypemeta.c
Original file line number Diff line number Diff line change
Expand Up @@ -267,26 +267,70 @@ string_unicode_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
static PyArray_Descr *
void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
{
/*
* We currently do not support promotion of void types unless they
* are equivalent.
*/
if (!PyArray_CanCastTypeTo(descr1, descr2, NPY_EQUIV_CASTING)) {
if (descr1->subarray == NULL && descr1->names == NULL &&
descr2->subarray == NULL && descr2->names == NULL) {
if (descr1->subarray == NULL && descr1->names == NULL &&
descr2->subarray == NULL && descr2->names == NULL) {
if (descr1->elsize != descr2->elsize) {
PyErr_SetString(PyExc_TypeError,
"Invalid type promotion with void datatypes of different "
"lengths. Use the `np.bytes_` datatype instead to pad the "
"shorter value with trailing zero bytes.");
return NULL;
}
else {
Py_INCREF(descr1);
return descr1;
}

if (descr1->names != NULL && descr2->names != NULL) {
/* If both have fields promoting individual fields may be possible */
static PyObject *promote_fields_func = NULL;
npy_cache_import("numpy.core._internal", "_promote_fields",
&promote_fields_func);
if (promote_fields_func == NULL) {
return NULL;
}
PyObject *result = PyObject_CallFunctionObjArgs(promote_fields_func,
descr1, descr2, NULL);
if (result == NULL) {
return NULL;
}
if (!PyObject_TypeCheck(result, Py_TYPE(descr1))) {
PyErr_SetString(PyExc_RuntimeError,
"Internal NumPy error: `_promote_fields` did not return "
"a valid descriptor object.");
Py_DECREF(result);
return NULL;
}
return (PyArray_Descr *)result;
}
else if (descr1->subarray != NULL && descr2->subarray != NULL) {
int cmp = PyObject_RichCompareBool(
descr1->subarray->shape, descr2->subarray->shape, Py_EQ);
if (error_converting(cmp)) {
return NULL;
}
if (!cmp) {
PyErr_SetString(PyExc_TypeError,
"invalid type promotion with structured datatype(s).");
"invalid type promotion with subarray datatypes "
"(shape mismatch).");
}
return NULL;
PyArray_Descr *new_base = PyArray_PromoteTypes(
descr1->subarray->base, descr2->subarray->base);
if (new_base == NULL) {
return NULL;
}

PyArray_Descr *new_descr = PyArray_DescrNew(descr1);
if (new_descr == NULL) {
Py_DECREF(new_base);
return NULL;
}
Py_SETREF(new_descr->subarray->base, new_base);
return new_descr;
}
Py_INCREF(descr1);
return descr1;

PyErr_SetString(PyExc_TypeError,
"invalid type promotion with structured datatype(s).");
return NULL;
}

static int
Expand Down

0 comments on commit 3b86a32

Please sign in to comment.