Skip to content

Commit

Permalink
DEP: Finalize ragged array creation deprecation
Browse files Browse the repository at this point in the history
This finalizes all DeprcationWarnings related to the creation of object
arrays from nested lists enforcing that users use `dtype=object` if this
is intentional.
  • Loading branch information
seberg committed Jul 17, 2022
1 parent 4156ae2 commit 189eb6e
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 140 deletions.
2 changes: 2 additions & 0 deletions doc/release/upcoming_changes/22004.expired.rst
@@ -0,0 +1,2 @@
* Ragged array creation will now always raise a ``ValueError`` unless
``dtype=object`` is passed. This includes very deeply nested sequences.
92 changes: 3 additions & 89 deletions numpy/core/src/multiarray/array_coercion.c
Expand Up @@ -1307,95 +1307,10 @@ PyArray_DiscoverDTypeAndShape(
/* Handle reaching the maximum depth differently: */
int too_deep = ndim == max_dims;

if (fixed_DType == NULL) {
/* This is discovered as object, but deprecated */
static PyObject *visibleDeprecationWarning = NULL;
npy_cache_import(
"numpy", "VisibleDeprecationWarning",
&visibleDeprecationWarning);
if (visibleDeprecationWarning == NULL) {
goto fail;
}
if (!too_deep) {
/* NumPy 1.19, 2019-11-01 */
if (PyErr_WarnEx(visibleDeprecationWarning,
"Creating an ndarray from ragged nested sequences (which "
"is a list-or-tuple of lists-or-tuples-or ndarrays with "
"different lengths or shapes) is deprecated. If you "
"meant to do this, you must specify 'dtype=object' "
"when creating the ndarray.", 1) < 0) {
goto fail;
}
}
else {
/* NumPy 1.20, 2020-05-08 */
/* Note, max_dims should normally always be NPY_MAXDIMS here */
if (PyErr_WarnFormat(visibleDeprecationWarning, 1,
"Creating an ndarray from nested sequences exceeding "
"the maximum number of dimensions of %d is deprecated. "
"If you mean to do this, you must specify "
"'dtype=object' when creating the ndarray.",
max_dims) < 0) {
goto fail;
}
}
/* Ensure that ragged arrays always return object dtype */
Py_XSETREF(*out_descr, PyArray_DescrFromType(NPY_OBJECT));
}
else if (fixed_DType->type_num != NPY_OBJECT) {
if (fixed_DType == NULL || fixed_DType->type_num != NPY_OBJECT) {
/* Only object DType supports ragged cases unify error */

/*
* We used to let certain ragged arrays pass if they also
* support e.g. conversion using `float(arr)`, which currently
* works for arrays with only one element.
* Thus we catch at least most of such cases here and give a
* DeprecationWarning instead of an error.
* Note that some of these will actually error later on when
* attempting to do the actual assign.
*/
int deprecate_single_element_ragged = 0;
coercion_cache_obj *current = *coercion_cache_head;
while (current != NULL) {
if (current->sequence) {
if (current->depth == ndim) {
/*
* Assume that only array-likes will allow the deprecated
* behaviour
*/
deprecate_single_element_ragged = 0;
break;
}
/* check next converted sequence/array-like */
current = current->next;
continue;
}
PyArrayObject *arr = (PyArrayObject *)(current->arr_or_sequence);
assert(PyArray_NDIM(arr) + current->depth >= ndim);
if (PyArray_NDIM(arr) != ndim - current->depth) {
/* This array is not compatible with the final shape */
if (PyArray_SIZE(arr) != 1) {
deprecate_single_element_ragged = 0;
break;
}
deprecate_single_element_ragged = 1;
}
current = current->next;
}

if (deprecate_single_element_ragged) {
/* Deprecated 2020-07-24, NumPy 1.20 */
if (DEPRECATE(
"setting an array element with a sequence. "
"This was supported in some cases where the elements "
"are arrays with a single element. For example "
"`np.array([1, np.array([2])], dtype=int)`. "
"In the future this will raise the same ValueError as "
"`np.array([1, [2]], dtype=int)`.") < 0) {
goto fail;
}
}
else if (!too_deep) {
if (!too_deep) {
PyObject *shape = PyArray_IntTupleFromIntp(ndim, out_shape);
PyErr_Format(PyExc_ValueError,
"setting an array element with a sequence. The "
Expand All @@ -1404,16 +1319,15 @@ PyArray_DiscoverDTypeAndShape(
"%R + inhomogeneous part.",
ndim, shape);
Py_DECREF(shape);
goto fail;
}
else {
PyErr_Format(PyExc_ValueError,
"setting an array element with a sequence. The "
"requested array would exceed the maximum number of "
"dimension of %d.",
max_dims);
goto fail;
}
goto fail;
}

/*
Expand Down
9 changes: 5 additions & 4 deletions numpy/core/tests/test_array_coercion.py
Expand Up @@ -490,9 +490,10 @@ def test_nested_simple(self):
with pytest.raises(ValueError):
np.array([nested], dtype="float64")

# We discover object automatically at this time:
with assert_warns(np.VisibleDeprecationWarning):
arr = np.array([nested])
with pytest.raises(ValueError, match=".*would exceed the maximum"):
np.array([nested]) # user must ask for `object` explicitly

arr = np.array([nested], dtype=object)
assert arr.dtype == np.dtype("O")
assert arr.shape == (1,) * np.MAXDIMS
assert arr.item() is initial
Expand Down Expand Up @@ -523,7 +524,7 @@ def test_nested_arraylikes(self, arraylike):
for i in range(np.MAXDIMS - 1):
nested = [nested]

with pytest.warns(DeprecationWarning):
with pytest.raises(ValueError, match=".*would exceed the maximum"):
# It will refuse to assign the array into
np.array(nested, dtype="float64")

Expand Down
42 changes: 0 additions & 42 deletions numpy/core/tests/test_deprecations.py
Expand Up @@ -475,34 +475,6 @@ def test_zerod(self):
self.assert_deprecated(lambda: np.nonzero(np.array(1)))


def test_deprecate_ragged_arrays():
# 2019-11-29 1.19.0
#
# NEP 34 deprecated automatic object dtype when creating ragged
# arrays. Also see the "ragged" tests in `test_multiarray`
#
# emits a VisibleDeprecationWarning
arg = [1, [2, 3]]
with assert_warns(np.VisibleDeprecationWarning):
np.array(arg)


class TestTooDeepDeprecation(_VisibleDeprecationTestCase):
# NumPy 1.20, 2020-05-08
# This is a bit similar to the above ragged array deprecation case.
message = re.escape("Creating an ndarray from nested sequences exceeding")

def test_deprecation(self):
nested = [1]
for i in range(np.MAXDIMS - 1):
nested = [nested]
self.assert_not_deprecated(np.array, args=(nested,))
self.assert_not_deprecated(np.array,
args=(nested,), kwargs=dict(dtype=object))

self.assert_deprecated(np.array, args=([nested],))


class TestToString(_DeprecationTestCase):
# 2020-03-06 1.19.0
message = re.escape("tostring() is deprecated. Use tobytes() instead.")
Expand Down Expand Up @@ -644,20 +616,6 @@ def test_deprecated(self):
self.assert_not_deprecated(np.add.outer, args=(arr, arr))


class TestRaggedArray(_DeprecationTestCase):
# 2020-07-24, NumPy 1.20.0
message = "setting an array element with a sequence"

def test_deprecated(self):
arr = np.ones((1, 1))
# Deprecated if the array is a leave node:
self.assert_deprecated(lambda: np.array([arr, 0], dtype=np.float64))
self.assert_deprecated(lambda: np.array([0, arr], dtype=np.float64))
# And when it is an assignment into a lower dimensional subarray:
self.assert_deprecated(lambda: np.array([arr, [0]], dtype=np.float64))
self.assert_deprecated(lambda: np.array([[0], arr], dtype=np.float64))


class FlatteningConcatenateUnsafeCast(_DeprecationTestCase):
# NumPy 1.20, 2020-09-03
message = "concatenate with `axis=None` will use same-kind casting"
Expand Down
9 changes: 4 additions & 5 deletions numpy/core/tests/test_multiarray.py
Expand Up @@ -1118,12 +1118,11 @@ def test_array_too_big(self):
shape=(max_bytes//itemsize + 1,), dtype=dtype)

def _ragged_creation(self, seq):
# without dtype=object, the ragged object should raise
with assert_warns(np.VisibleDeprecationWarning):
# without dtype=object, the ragged object raises
with pytest.raises(ValueError, match=".*detected shape was"):
a = np.array(seq)
b = np.array(seq, dtype=object)
assert_equal(a, b)
return b

return np.array(seq, dtype=object)

def test_ragged_ndim_object(self):
# Lists of mismatching depths are treated as object arrays
Expand Down

0 comments on commit 189eb6e

Please sign in to comment.