API: Add numpy.types module and fill it with DType classes

numpy · Apr 12, 2023 · 03e5cf0 · 03e5cf0
1 parent a026b7f
commit 03e5cf0
Show file tree

Hide file tree

Showing 10 changed files with 237 additions and 51 deletions.
diff --git a/doc/source/reference/routines.other.rst b/doc/source/reference/routines.other.rst
@@ -59,3 +59,5 @@ Matlab-like Functions
    disp
 
 .. automodule:: numpy.exceptions
+
+.. automodule:: numpy.types
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py
@@ -142,13 +142,14 @@ def _DType_reconstruct(scalar_type):
 
 
 def _DType_reduce(DType):
-    # To pickle a DType without having to add top-level names, pickle the
-    # scalar type for now (and assume that reconstruction will be possible).
-    if not DType._legacy:
-        # If we don't have a legacy DType, we should have a valid top level
-        # name available, so use it (i.e. `np.dtype` itself!)
+    # As types/classes, most DTypes can simply be pickled by their name:
+    if not DType._legacy or DType.__module__ == "numpy.types":
         return DType.__name__
-    scalar_type = DType.type  # pickle the scalar type for reconstruction
+
+    # However, user defined legacy dtypes (like rational) do not end up in
+    # `numpy.types` as module and do not have a public class at all.
+    # For these, we pickle them by reconstructing them from the scalar type:
+    scalar_type = DType.type
     return _DType_reconstruct, (scalar_type,)
 
 

diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
@@ -4646,12 +4646,30 @@ set_typeinfo(PyObject *dict)
      * should be defined on the class and inherited to the scalar.
      * (NPY_HALF is the largest builtin one.)
      */
-    for (i = 0; i <= NPY_HALF; i++) {
-        if (dtypemeta_wrap_legacy_descriptor(_builtin_descrs[i]) < 0) {
-            return -1;
-        }
+    /**begin repeat
+     *
+     * #NAME = BOOL,
+     *         BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+     *         LONG, ULONG, LONGLONG, ULONGLONG,
+     *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+     *         CFLOAT, CDOUBLE, CLONGDOUBLE,
+     *         OBJECT, STRING, UNICODE, VOID,
+     *         DATETIME, TIMEDELTA#
+     */
+    if (dtypemeta_wrap_legacy_descriptor(
+            _builtin_descrs[NPY_@NAME@],
+            "numpy.types." NPY_@NAME@_Name "DType",
+#ifdef NPY_@NAME@_alias
+            "numpy.types." NPY_@NAME@_Alias "DType"
+#else
+            NULL
+#endif
+            ) < 0) {
+        return -1;
     }
 
+    /**end repeat**/
+
     /*
      * Add cast functions for the new types
      */

diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
@@ -9,7 +9,9 @@
 #include <numpy/ndarraytypes.h>
 #include <numpy/arrayscalars.h>
 #include "npy_pycompat.h"
+#include "npy_import.h"
 
+#include "arraytypes.h"
 #include "common.h"
 #include "dtypemeta.h"
 #include "descriptor.h"
@@ -723,12 +725,17 @@ object_common_dtype(
  * be a HeapType and its instances should be exact PyArray_Descr structs.
  *
  * @param descr The descriptor that should be wrapped.
- * @param name The name for the DType, if NULL the type character is used.
+ * @param name The name for the DType.
+ * @param alias A second name which is also set to the new class for builtins
+ *              (i.e. `np.types.LongDType` for `np.types.Int64DType`).
+ *              Some may have more aliases, as `intp` is not its own thing,
+ *              as of writing this, these are not added here.
  *
  * @returns 0 on success, -1 on failure.
  */
 NPY_NO_EXPORT int
-dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
+dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr,
+        const char *name, const char *alias)
 {
     int has_type_set = Py_TYPE(descr) == &PyArrayDescr_Type;
 
@@ -755,47 +762,14 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
         return -1;
     }
 
-    /*
-     * Note: we have no intention of freeing the memory again since this
-     * behaves identically to static type definition (see comment above).
-     * This is seems cleaner for the legacy API, in the new API both static
-     * and heap types are possible (some difficulty arises from the fact that
-     * these are instances of DTypeMeta and not type).
-     * In particular our own DTypes can be true static declarations.
-     * However, this function remains necessary for legacy user dtypes.
-     */
-
-    const char *scalar_name = descr->typeobj->tp_name;
-    /*
-     * We have to take only the name, and ignore the module to get
-     * a reasonable __name__, since static types are limited in this regard
-     * (this is not ideal, but not a big issue in practice).
-     * This is what Python does to print __name__ for static types.
-     */
-    const char *dot = strrchr(scalar_name, '.');
-    if (dot) {
-        scalar_name = dot + 1;
-    }
-    Py_ssize_t name_length = strlen(scalar_name) + 14;
-
-    char *tp_name = PyMem_Malloc(name_length);
-    if (tp_name == NULL) {
-        PyErr_NoMemory();
-        return -1;
-    }
-
-    snprintf(tp_name, name_length, "numpy.dtype[%s]", scalar_name);
-
     NPY_DType_Slots *dt_slots = PyMem_Malloc(sizeof(NPY_DType_Slots));
     if (dt_slots == NULL) {
-        PyMem_Free(tp_name);
         return -1;
     }
     memset(dt_slots, '\0', sizeof(NPY_DType_Slots));
 
     PyArray_DTypeMeta *dtype_class = PyMem_Malloc(sizeof(PyArray_DTypeMeta));
     if (dtype_class == NULL) {
-        PyMem_Free(tp_name);
         PyMem_Free(dt_slots);
         return -1;
     }
@@ -817,13 +791,19 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
             .tp_flags = Py_TPFLAGS_DEFAULT,
             .tp_base = &PyArrayDescr_Type,
             .tp_new = (newfunc)legacy_dtype_default_new,
+            .tp_doc = ( 
+                "DType class corresponding to the scalar type and dtype of "
+                "the same name.\n\n"
+                "Please see `numpy.dtype` for the typical way to create\n"
+                "dtype instances and :ref:`arrays.dtypes` for additional\n"
+                "information."),
         },},
         .flags = NPY_DT_LEGACY,
         /* Further fields are not common between DTypes */
     };
     memcpy(dtype_class, &prototype, sizeof(PyArray_DTypeMeta));
     /* Fix name of the Type*/
-    ((PyTypeObject *)dtype_class)->tp_name = tp_name;
+    ((PyTypeObject *)dtype_class)->tp_name = name;
     dtype_class->dt_slots = dt_slots;
 
     /* Let python finish the initialization (probably unnecessary) */
@@ -912,6 +892,21 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     /* Finally, replace the current class of the descr */
     Py_SET_TYPE(descr, (PyTypeObject *)dtype_class);
 
+    /* And it to the types submodule if it is a builtin dtype */
+    if (!PyTypeNum_ISUSERDEF(descr->type_num)) {
+        static PyObject *add_dtype_helper = NULL;
+        npy_cache_import("numpy.types", "_add_dtype_helper", &add_dtype_helper);
+        if (add_dtype_helper == NULL) {
+            return -1;
+        }
+
+        if (PyObject_CallFunction(
+                add_dtype_helper,
+                "Os", (PyObject *)dtype_class, alias) == NULL) {
+            return -1;
+        }
+    }
+
     return 0;
 }
 
@@ -949,7 +944,8 @@ static PyGetSetDef dtypemeta_getset[] = {
 
 static PyMemberDef dtypemeta_members[] = {
     {"type",
-        T_OBJECT, offsetof(PyArray_DTypeMeta, scalar_type), READONLY, NULL},
+        T_OBJECT, offsetof(PyArray_DTypeMeta, scalar_type), READONLY,
+        "scalar type corresponding to the DType."},
     {NULL, 0, 0, 0, NULL},
 };
 

diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
@@ -123,7 +123,8 @@ python_builtins_are_known_scalar_types(
         PyArray_DTypeMeta *cls, PyTypeObject *pytype);
 
 NPY_NO_EXPORT int
-dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem);
+dtypemeta_wrap_legacy_descriptor(
+        PyArray_Descr *dtypem, const char *name, const char *alias);
 
 #ifdef __cplusplus
 }

diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c
@@ -261,12 +261,43 @@ PyArray_RegisterDataType(PyArray_Descr *descr)
         return -1;
     }
 
+    /*
+     * Legacy user DTypes classes cannot have a name, since the user never
+     * defined on.  So we create a name for them here, these DTypes are
+     * effectively static types.
+     *
+     * Note: we have no intention of freeing the memory again since this
+     * behaves identically to static type definition.
+     */
+
+    const char *scalar_name = descr->typeobj->tp_name;
+    /*
+     * We have to take only the name, and ignore the module to get
+     * a reasonable __name__, since static types are limited in this regard
+     * (this is not ideal, but not a big issue in practice).
+     * This is what Python does to print __name__ for static types.
+     */
+    const char *dot = strrchr(scalar_name, '.');
+    if (dot) {
+        scalar_name = dot + 1;
+    }
+    Py_ssize_t name_length = strlen(scalar_name) + 14;
+
+    char *name = PyMem_Malloc(name_length);
+    if (name == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    snprintf(name, name_length, "numpy.dtype[%s]", scalar_name);
+
     userdescrs[NPY_NUMUSERTYPES++] = descr;
 
     descr->type_num = typenum;
-    if (dtypemeta_wrap_legacy_descriptor(descr) < 0) {
+    if (dtypemeta_wrap_legacy_descriptor(descr, name, NULL) < 0) {
         descr->type_num = -1;
         NPY_NUMUSERTYPES--;
+        PyMem_Free(name);  /* free the name on failure, but only then */
         return -1;
     }
     if (use_void_clearimpl) {

diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
@@ -7,6 +7,7 @@
 from typing import Any
 
 import numpy as np
+import numpy.types
 from numpy.core._rational_tests import rational
 from numpy.core._multiarray_tests import create_custom_field_dtype
 from numpy.testing import (
@@ -1563,8 +1564,17 @@ def test_basic_dtypes_subclass_properties(self, dtype):
         dtype = np.dtype(dtype)
         assert isinstance(dtype, np.dtype)
         assert type(dtype) is not np.dtype
-        assert type(dtype).__name__ == f"dtype[{dtype.type.__name__}]"
-        assert type(dtype).__module__ == "numpy"
+        if dtype.type.__name__ != "rational":
+            dt_name = type(dtype).__name__
+            sc_name = dtype.type.__name__
+            assert dt_name.lower().removesuffix("dtype") == sc_name.strip("_")
+            assert type(dtype).__module__ == "numpy.types"
+
+            assert getattr(numpy.types, type(dtype).__name__) is type(dtype)
+        else:
+            assert type(dtype).__name__ == "dtype[rational]"
+            assert type(dtype).__module__ == "numpy"
+
         assert not type(dtype)._abstract
 
         # the flexible dtypes and datetime/timedelta have additional parameters
@@ -1599,6 +1609,20 @@ def test_is_numeric(self):
         for code in non_numeric_codes:
             assert not type(np.dtype(code))._is_numeric
 
+    @pytest.mark.parametrize("int_", ["UInt", "Int"])
+    @pytest.mark.parametrize("size", [8, 16, 32, 64])
+    def test_integer_alias_names(self, int_, size):
+        DType = getattr(numpy.types, f"{int_}{size}DType")
+        sctype = getattr(numpy, f"{int_.lower()}{size}")
+        assert DType.type is sctype
+        assert DType.__name__.lower().removesuffix("dtype") == sctype.__name__
+
+    @pytest.mark.parametrize("name",
+            ["Half", "Float", "Double", "CFloat", "CDouble"])
+    def test_float_alias_names(self, name):
+        with pytest.raises(AttributeError):
+            getattr(numpy.types, name + "DType") is numpy.types.Float16DType
+
 
 class TestFromCTypes:
 

diff --git a/numpy/tests/test_public_api.py b/numpy/tests/test_public_api.py
@@ -160,6 +160,7 @@ def test_NPY_NO_EXPORT():
     "random",
     "testing",
     "testing.overrides",
+    "types",
     "typing",
     "typing.mypy_plugin",
     "version",

diff --git a/numpy/types.py b/numpy/types.py
@@ -0,0 +1,73 @@
+"""
+Names of builtin NumPy Types (:mod:`numpy.types`)
+==================================================
+
+Similar to the builtin ``types`` module, this submodule defines types (classes)
+that are not widely used directly.
+
+.. versionadded:: NumPy 1.25
+
+    The types module is new in NumPy 1.25.  Older exceptions remain
+    available through the main NumPy namespace for compatibility.
+
+
+DType classes
+-------------
+
+The following are the classes of the corresponding NumPy dtype instances and
+NumPy scalar types.  The classe can be used for ``isisntance`` checks but are
+otherwise not typically useful as of now.
+
+For general information see `numpy.dtype` and :ref:`arrays.dtypes`.
+
+.. list-table::
+    :header-rows: 1
+
+    * - Group
+      - DType class
+
+    * - Boolean
+      - ``BoolDType``
+
+    * - Bit-sized integers
+      - ``Int8DType``, ``UInt8DType``, ``Int16DType``, ``UInt16DType``,
+        ``Int32DType``, ``UInt32DType``, ``Int64DType``, ``UInt64DType``
+
+    * - C-named integers (may be aliases)
+      - ``ByteDType``, ``UByteDType``, ``ShortDType``, ``UShortDType``,
+        ``IntDType``, ``UIntDType``, ``LongDType``, ``ULongDType``,
+        ``LongLongDType``, ``ULongLongDType``
+
+    * - Floating point
+      - ``Float16DType``, ``Float32DType``, ``Float64DType``,
+        ``LongDoubleDType``
+
+    * - Complex
+      - ``Complex64DType``, ``Complex128DType``, ``CLongDoubleDType``
+
+    * - Strings
+      - ``BytesDType``, ``BytesDType``
+
+    * - Times
+      - ``DateTime64DType``, ``TimeDelta64DType``
+
+    * - Others
+      - ``ObjectDType``, ``VoidDType``
+
+"""
+
+__all__ = []
+
+
+def _add_dtype_helper(DType, alias):
+    # Function to add DTypes a bit more conveniently without channeling them
+    # through `numpy.core._multiarray_umath` namespace or similar.
+    from numpy import types
+
+    setattr(types, DType.__name__, DType)
+    __all__.append(DType.__name__)
+
+    if alias:
+        alias = alias.removeprefix("numpy.types.")
+        setattr(types, alias, DType)
+        __all__.append(alias)