Skip to content

Commit

Permalink
ENH: Allow creating structured void scalars by passing dtype
Browse files Browse the repository at this point in the history
Adds an optional `dtype=` kwarg to `np.void`.  If given (and not None),
this kwarg effectively turns it into:

     res = np.array(data, dtype=dtype)[()]

Thanks for Marten's review and Bas' help with the typing.

Reviewed-by: Marten van Kerkwijk <mhvk@astro.utoronto.ca>
Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com>
  • Loading branch information
seberg and BvB93 committed Sep 21, 2022
1 parent 2524a53 commit 519ce63
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 20 deletions.
4 changes: 4 additions & 0 deletions doc/release/upcoming_changes/22316.new_feature.rst
@@ -0,0 +1,4 @@
``np.void`` now has a ``dtype`` argument
----------------------------------------
NumPy now allows constructing structured void scalars directly by
passing the ``dtype`` argument to ``np.void``.
6 changes: 5 additions & 1 deletion numpy/__init__.pyi
Expand Up @@ -40,6 +40,7 @@ from numpy._typing import (
# DTypes
DTypeLike,
_DTypeLike,
_DTypeLikeVoid,
_SupportsDType,
_VoidDTypeLike,

Expand Down Expand Up @@ -3058,7 +3059,10 @@ class flexible(generic): ... # type: ignore
# depending on whether or not it's used as an opaque bytes sequence
# or a structure
class void(flexible):
def __init__(self, value: _IntLike_co | bytes, /) -> None: ...
@overload
def __init__(self, value: _IntLike_co | bytes, /, dtype : None = ...) -> None: ...
@overload
def __init__(self, value: Any, /, dtype: _DTypeLikeVoid) -> None: ...
@property
def real(self: _ArraySelf) -> _ArraySelf: ...
@property
Expand Down
48 changes: 42 additions & 6 deletions numpy/core/_add_newdocs_scalars.py
Expand Up @@ -225,16 +225,52 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc):

add_newdoc_for_scalar_type('void', [],
r"""
Either an opaque sequence of bytes, or a structure.
np.void(length_or_data, /, dtype=None)
Create a new structured or unstructured void scalar.
Parameters
----------
length_or_data : int, array-like, bytes-like, object
One of multiple meanings (see notes). The length or
bytes data of an unstructured void. Or alternatively,
the data to be stored in the new scalar when `dtype`
is provided.
This can be an array-like, in which case an array may
be returned.
dtype : dtype, optional
If provided the dtype of the new scalar. This dtype must
be "void" dtype (i.e. a structured or unstructured
void).
..versionadded:: 1.24
Notes
-----
For historical reasons and because void scalars can represent both
arbitrary byte data and structured dtypes, the void constructor
has three calling conventions:
1. ``np.void(5)`` creates a ``dtype="V5"`` scalar filled with
``\0`` bytes. The 5 can be a Python or NumPy integer.
2. ``np.void(b"bytes-like")`` creates a void scalar from
the byte string. The dtype is chosen based on its length.
3. When a ``dtype=`` is passed the call is rougly the same as an
array creation. However a void scalar is returned when possible.
Please see the examples which show all three different conventions.
Examples
--------
>>> np.void(5)
void(b'\x00\x00\x00\x00\x00')
>>> np.void(b'abcd')
void(b'\x61\x62\x63\x64')
>>> np.void((5, 3.2, "eggs"), dtype="i,d,S5")
(5, 3.2, b'eggs') # looks like a tuple, but is `np.void`
>>> np.void(3, dtype=[('x', np.int8), ('y', np.int8)])
(3, 3) # looks like a tuple, but is `np.void`
Structured `void` scalars can only be constructed via extraction from :ref:`structured_arrays`:
>>> arr = np.array((1, 2), dtype=[('x', np.int8), ('y', np.int8)])
>>> arr[()]
(1, 2) # looks like a tuple, but is `np.void`
""")

add_newdoc_for_scalar_type('datetime64', [],
Expand Down
43 changes: 32 additions & 11 deletions numpy/core/src/multiarray/scalartypes.c.src
Expand Up @@ -3170,28 +3170,33 @@ static PyObject *
void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *obj, *arr;
PyObject *new = NULL;
PyArray_Descr *descr = NULL;

static char *kwnames[] = {"", NULL}; /* positional-only */
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:void", kwnames, &obj)) {
static char *kwnames[] = {"", "dtype", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:void", kwnames,
&obj, &PyArray_DescrConverter2, &descr)) {
return NULL;
}
/*
* For a VOID scalar first see if obj is an integer or long
* and create new memory of that size (filled with 0) for the scalar
*/
if (PyLong_Check(obj) ||
if (descr == NULL && (
PyLong_Check(obj) ||
PyArray_IsScalar(obj, Integer) ||
(PyArray_Check(obj) &&
PyArray_NDIM((PyArrayObject *)obj)==0 &&
PyArray_ISINTEGER((PyArrayObject *)obj))) {
new = Py_TYPE(obj)->tp_as_number->nb_int(obj);
}
if (new && PyLong_Check(new)) {
PyArray_ISINTEGER((PyArrayObject *)obj)))) {

PyObject *length = Py_TYPE(obj)->tp_as_number->nb_int(obj);
if (length == NULL) {
return NULL;
}

PyObject *ret;
char *destptr;
npy_ulonglong memu = PyLong_AsUnsignedLongLong(new);
Py_DECREF(new);
npy_ulonglong memu = PyLong_AsUnsignedLongLong(length);
Py_DECREF(length);
if (PyErr_Occurred() || (memu > NPY_MAX_INT)) {
PyErr_Clear();
PyErr_Format(PyExc_OverflowError,
Expand Down Expand Up @@ -3226,7 +3231,23 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return ret;
}

arr = PyArray_FROM_OTF(obj, NPY_VOID, NPY_ARRAY_FORCECAST);
if (descr == NULL) {
/* Use the "size-less" void dtype to discover the size. */
descr = PyArray_DescrNewFromType(NPY_VOID);
}
else if (descr->type_num != NPY_VOID || PyDataType_HASSUBARRAY(descr)) {
/* we reject subarrays, since subarray scalars do not exist. */
PyErr_Format(PyExc_TypeError,
"void: descr must be a `void` dtype that is not "
"a subarray dtype (structured or unstructured). "
"Got '%.100R'.", descr);
return NULL;
}
else {
Py_INCREF(descr);
}

arr = PyArray_FromAny(obj, descr, 0, 0, NPY_ARRAY_FORCECAST, NULL);
return PyArray_Return((PyArrayObject *)arr);
}

Expand Down
70 changes: 70 additions & 0 deletions numpy/core/tests/test_scalar_ctors.py
Expand Up @@ -113,3 +113,73 @@ def test_reals(self, t1, t2):
@pytest.mark.parametrize('t2', cfloat_types + [None])
def test_complex(self, t1, t2):
return self._do_test(t1, t2)


@pytest.mark.parametrize("length",
[5, np.int8(5), np.array(5, dtype=np.uint16)])
def test_void_via_length(length):
res = np.void(length)
assert type(res) is np.void
assert res.item() == b"\0" * 5
assert res.dtype == "V5"

@pytest.mark.parametrize("bytes_",
[b"spam", np.array(567.)])
def test_void_from_byteslike(bytes_):
res = np.void(bytes_)
expected = bytes(bytes_)
assert type(res) is np.void
assert res.item() == expected

# Passing dtype can extend it (this is how filling works)
res = np.void(bytes_, dtype="V100")
assert type(res) is np.void
assert res.item()[:len(expected)] == expected
assert res.item()[len(expected):] == b"\0" * (res.nbytes - len(expected))
# As well as shorten:
res = np.void(bytes_, dtype="V4")
assert type(res) is np.void
assert res.item() == expected[:4]

def test_void_arraylike_trumps_byteslike():
# The memoryview is converted as an array-like of shape (18,)
# rather than a single bytes-like of that length.
m = memoryview(b"just one mintleaf?")
res = np.void(m)
assert type(res) is np.ndarray
assert res.dtype == "V1"
assert res.shape == (18,)

def test_void_dtype_arg():
# Basic test for the dtype argument (positional and keyword)
res = np.void((1, 2), dtype="i,i")
assert res.item() == (1, 2)
res = np.void((2, 3), "i,i")
assert res.item() == (2, 3)

@pytest.mark.parametrize("data",
[5, np.int8(5), np.array(5, dtype=np.uint16)])
def test_void_from_integer_with_dtype(data):
# The "length" meaning is ignored, rather data is used:
res = np.void(data, dtype="i,i")
assert type(res) is np.void
assert res.dtype == "i,i"
assert res["f0"] == 5 and res["f1"] == 5

def test_void_from_structure():
dtype = np.dtype([('s', [('f', 'f8'), ('u', 'U1')]), ('i', 'i2')])
data = np.array(((1., 'a'), 2), dtype=dtype)
res = np.void(data[()], dtype=dtype)
assert type(res) is np.void
assert res.dtype == dtype
assert res == data[()]

def test_void_bad_dtype():
with pytest.raises(TypeError,
match="void: descr must be a `void.*int64"):
np.void(4, dtype="i8")

# Subarray dtype (with shape `(4,)` is rejected):
with pytest.raises(TypeError,
match=r"void: descr must be a `void.*\(4,\)"):
np.void(4, dtype="4i")
5 changes: 3 additions & 2 deletions numpy/typing/tests/data/fail/scalars.pyi
Expand Up @@ -47,7 +47,8 @@ np.uint16(A()) # E: incompatible type
np.uint32(A()) # E: incompatible type
np.uint64(A()) # E: incompatible type

np.void("test") # E: incompatible type
np.void("test") # E: No overload variant
np.void("test", dtype=None) # E: No overload variant

np.generic(1) # E: Cannot instantiate abstract class
np.number(1) # E: Cannot instantiate abstract class
Expand All @@ -62,7 +63,7 @@ np.uint64(value=0) # E: Unexpected keyword argument
np.complex128(value=0.0j) # E: Unexpected keyword argument
np.str_(value='bob') # E: No overload variant
np.bytes_(value=b'test') # E: No overload variant
np.void(value=b'test') # E: Unexpected keyword argument
np.void(value=b'test') # E: No overload variant
np.bool_(value=True) # E: Unexpected keyword argument
np.datetime64(value="2019") # E: No overload variant
np.timedelta64(value=0) # E: Unexpected keyword argument
Expand Down
2 changes: 2 additions & 0 deletions numpy/typing/tests/data/pass/scalars.py
Expand Up @@ -113,6 +113,8 @@ def __float__(self) -> float:
np.void(np.bool_(True))
np.void(b"test")
np.void(np.bytes_("test"))
np.void(object(), [("a", "O"), ("b", "O")])
np.void(object(), dtype=[("a", "O"), ("b", "O")])

# Protocols
i8 = np.int64()
Expand Down

0 comments on commit 519ce63

Please sign in to comment.