Allow ufunc operand flags to be set #359

Merged
merged 12 commits into from May 10, 2013
+250 −23
Split
@@ -159,3 +159,21 @@ General
Use of non-integer indices has been deprecated. Previously float indices
were truncated to integers without warning.
+C-API
+~~~~~
+
+New Features
+============
+
+When creating a ufunc, the default ufunc operand flags can be overridden
+via the new op_flags attribute of the ufunc object. For example, to set
+the operand flag for the first input to read/write:
+
+PyObject \*ufunc = PyUFunc_FromFuncAndData(...);
+ufunc->op_flags[0] = NPY_ITER_READWRITE;
+
+This allows a ufunc to perform an operation in place. Also, global nditer flags
+can be overridden via the new iter_flags attribute of the ufunc object.
+For example, to set the reduce flag for a ufunc:
+
+ufunc->iter_flags = NPY_ITER_REDUCE_OK;
@@ -652,6 +652,8 @@ PyUFunc_Type
void *ptr;
PyObject *obj;
PyObject *userloops;
+ npy_uint32 *op_flags;
+ npy_uint32 *iter_flags;
} PyUFuncObject;
.. cmacro:: PyUFuncObject.PyObject_HEAD
@@ -755,6 +757,14 @@ PyUFunc_Type
numbers are always larger than :cdata:`NPY_USERDEF`.
+ .. cmember:: npy_uint32 PyUFuncObject.op_flags
+
+ Override the default operand flags for each ufunc operand.
+
+ .. cmember:: npy_uint32 PyUFuncObject.iter_flags
+
+ Override the default nditer flags for the ufunc.
+
PyArrayIter_Type
----------------
@@ -212,6 +212,20 @@ typedef struct _tagPyUFuncObject {
* A function which returns a masked inner loop for the ufunc.
*/
PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
+
+ /*
+ * List of flags for each operand when ufunc is called by nditer object.
+ * These flags will be used in addition to the default flags for each
+ * operand set by nditer object.
+ */
+ npy_uint32 *op_flags;
+
+ /*
+ * List of global flags used when ufunc is called by nditer object.
+ * These flags will be used in addition to the default global flags
+ * set by nditer object.
+ */
+ npy_uint32 iter_flags;
} PyUFuncObject;
#include "arrayobject.h"
View
@@ -935,6 +935,13 @@ def get_dotblas_sources(ext, build_dir):
config.add_extension('multiarray_tests',
sources = [join('src', 'multiarray', 'multiarray_tests.c.src')])
+ #######################################################################
+ # operand_flag_tests module #
+ #######################################################################
+
+ config.add_extension('operand_flag_tests',
+ sources = [join('src','umath', 'operand_flag_tests.c.src')])
+
config.add_data_dir('tests')
config.add_data_dir('tests/data')
@@ -0,0 +1,106 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#include <stdint.h>
+#include <math.h>
+#include <Python.h>
+#include <structmember.h>
+#include <numpy/arrayobject.h>
+#include <numpy/ufuncobject.h>
+#include "numpy/npy_3kcompat.h"
+
+
+static PyMethodDef TestMethods[] = {
+ {NULL, NULL, 0, NULL}
+};
+
+
+static void
+inplace_add(char **args, npy_intp *dimensions, npy_intp *steps, void *data)
+{
+ npy_intp i;
+ npy_intp n = dimensions[0];
+ char *in1 = args[0];
+ char *in2 = args[1];
+ npy_intp in1_step = steps[0];
+ npy_intp in2_step = steps[1];
+
+ for (i = 0; i < n; i++) {
+ (*(long *)in1) = *(long*)in1 + *(long*)in2;
+ in1 += in1_step;
+ in2 += in2_step;
+ }
+}
+
+
+/*This a pointer to the above function*/
+PyUFuncGenericFunction funcs[1] = {&inplace_add};
+
+/* These are the input and return dtypes of logit.*/
+static char types[2] = {NPY_LONG, NPY_LONG};
+
+static void *data[1] = {NULL};
+
+#if defined(NPY_PY3K)
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "operand_flag_tests",
+ NULL,
+ -1,
+ TestMethods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+#define RETVAL m
+PyMODINIT_FUNC PyInit_operand_flag_tests(void)
+{
+#else
+#define RETVAL
+PyMODINIT_FUNC initoperand_flag_tests(void)
+{
+#endif
+ PyObject *m = NULL;
+ PyObject *ufunc;
+
+#if defined(NPY_PY3K)
+ m = PyModule_Create(&moduledef);
+#else
+ m = Py_InitModule("operand_flag_tests", TestMethods);
+#endif
+ if (m == NULL) {
+ goto fail;
+ }
+
+ import_array();
+ import_umath();
+
+ ufunc = PyUFunc_FromFuncAndData(funcs, data, types, 1, 2, 0,
+ PyUFunc_None, "inplace_add",
+ "inplace_add_docstring", 0);
+
+ /*
+ * Set flags to turn off buffering for first input operand,
+ * so that result can be written back to input operand.
+ */
+ ((PyUFuncObject*)ufunc)->op_flags[0] = NPY_ITER_READWRITE;
+ ((PyUFuncObject*)ufunc)->iter_flags = NPY_ITER_REDUCE_OK;
+ PyModule_AddObject(m, "inplace_add", (PyObject*)ufunc);
+
+ return RETVAL;
+
+fail:
+ if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "cannot load operand_flag_tests module.");
+ }
+#if defined(NPY_PY3K)
+ if (m) {
+ Py_DECREF(m);
+ m = NULL;
+ }
+#endif
+ return RETVAL;
+
+}
@@ -1181,34 +1181,46 @@ iterator_loop(PyUFuncObject *ufunc,
npy_intp *count_ptr;
PyArrayObject **op_it;
+ npy_uint32 iter_flags;
NPY_BEGIN_THREADS_DEF;
/* Set up the flags */
for (i = 0; i < nin; ++i) {
- op_flags[i] = NPY_ITER_READONLY|
+ op_flags[i] = NPY_ITER_READONLY |
NPY_ITER_ALIGNED;
+ /*
+ * If READWRITE flag has been set for this operand,
+ * then clear default READONLY flag
+ */
+ op_flags[i] |= ufunc->op_flags[i];
+ if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) {
+ op_flags[i] &= ~NPY_ITER_READONLY;
+ }
}
for (i = nin; i < nop; ++i) {
- op_flags[i] = NPY_ITER_WRITEONLY|
- NPY_ITER_ALIGNED|
- NPY_ITER_ALLOCATE|
- NPY_ITER_NO_BROADCAST|
+ op_flags[i] = NPY_ITER_WRITEONLY |
+ NPY_ITER_ALIGNED |
+ NPY_ITER_ALLOCATE |
+ NPY_ITER_NO_BROADCAST |
NPY_ITER_NO_SUBTYPE;
}
+ iter_flags = ufunc->iter_flags |
+ NPY_ITER_EXTERNAL_LOOP |
+ NPY_ITER_REFS_OK |
+ NPY_ITER_ZEROSIZE_OK |
+ NPY_ITER_BUFFERED |
+ NPY_ITER_GROWINNER |
+ NPY_ITER_DELAY_BUFALLOC;
+
/*
* Allocate the iterator. Because the types of the inputs
* were already checked, we use the casting rule 'unsafe' which
* is faster to calculate.
*/
iter = NpyIter_AdvancedNew(nop, op,
- NPY_ITER_EXTERNAL_LOOP|
- NPY_ITER_REFS_OK|
- NPY_ITER_ZEROSIZE_OK|
- NPY_ITER_BUFFERED|
- NPY_ITER_GROWINNER|
- NPY_ITER_DELAY_BUFALLOC,
+ iter_flags,
order, NPY_UNSAFE_CASTING,
op_flags, dtype,
-1, NULL, NULL, buffersize);
@@ -1462,6 +1474,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
npy_intp *countptr;
PyArrayObject **op_it;
+ npy_uint32 iter_flags;
NPY_BEGIN_THREADS_DEF;
@@ -1481,6 +1494,14 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
op_flags[i] = default_op_in_flags |
NPY_ITER_READONLY |
NPY_ITER_ALIGNED;
+ /*
+ * If READWRITE flag has been set for this operand,
+ * then clear default READONLY flag
+ */
+ op_flags[i] |= ufunc->op_flags[i];
+ if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) {
+ op_flags[i] &= ~NPY_ITER_READONLY;
+ }
}
for (i = nin; i < nop; ++i) {
op_flags[i] = default_op_out_flags |
@@ -1496,17 +1517,20 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
NPY_UF_DBG_PRINT("Making iterator\n");
+ iter_flags = ufunc->iter_flags |
+ NPY_ITER_EXTERNAL_LOOP |
+ NPY_ITER_REFS_OK |
+ NPY_ITER_ZEROSIZE_OK |
+ NPY_ITER_BUFFERED |
+ NPY_ITER_GROWINNER;
+
/*
* Allocate the iterator. Because the types of the inputs
* were already checked, we use the casting rule 'unsafe' which
* is faster to calculate.
*/
iter = NpyIter_AdvancedNew(nop + ((wheremask != NULL) ? 1 : 0), op,
- NPY_ITER_EXTERNAL_LOOP |
- NPY_ITER_REFS_OK |
- NPY_ITER_ZEROSIZE_OK |
- NPY_ITER_BUFFERED |
- NPY_ITER_GROWINNER,
+ iter_flags,
order, NPY_UNSAFE_CASTING,
op_flags, dtypes,
-1, NULL, NULL, buffersize);
@@ -1663,8 +1687,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
npy_uint32 op_flags[NPY_MAXARGS];
npy_intp iter_shape[NPY_MAXARGS];
-
NpyIter *iter = NULL;
+ npy_uint32 iter_flags;
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -1964,9 +1988,17 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
* can't do buffering, so must COPY or UPDATEIFCOPY.
*/
for (i = 0; i < nin; ++i) {
- op_flags[i] = NPY_ITER_READONLY|
- NPY_ITER_COPY|
+ op_flags[i] = NPY_ITER_READONLY |
+ NPY_ITER_COPY |
NPY_ITER_ALIGNED;
+ /*
+ * If READWRITE flag has been set for this operand,
+ * then clear default READONLY flag
+ */
+ op_flags[i] |= ufunc->op_flags[i];
+ if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) {
+ op_flags[i] &= ~NPY_ITER_READONLY;
+ }
}
for (i = nin; i < nop; ++i) {
op_flags[i] = NPY_ITER_READWRITE|
@@ -1976,11 +2008,26 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
NPY_ITER_NO_BROADCAST;
}
+ /*
+ * If there are no iteration dimensions, create a fake one
+ * so that the scalar edge case works right.
+ */
@charris

charris May 9, 2013

Owner

@seberg ISTR you may have done something relevant to this.

@seberg

seberg May 9, 2013

Member

Yeah, my guess is, it can probably be removed. But would have to try too...

+ if (iter_ndim == 0) {
+ iter_ndim = 1;
+ iter_shape[0] = 1;
+ for (i = 0; i < nop; ++i) {
+ op_axes[i][0] = -1;
+ }
+ }
+
+ iter_flags = ufunc->iter_flags |
+ NPY_ITER_MULTI_INDEX |
+ NPY_ITER_REFS_OK |
+ NPY_ITER_REDUCE_OK |
+ NPY_ITER_ZEROSIZE_OK;
+
/* Create the iterator */
- iter = NpyIter_AdvancedNew(nop, op, NPY_ITER_MULTI_INDEX|
- NPY_ITER_REFS_OK|
- NPY_ITER_REDUCE_OK|
- NPY_ITER_ZEROSIZE_OK,
+ iter = NpyIter_AdvancedNew(nop, op, iter_flags,
order, NPY_UNSAFE_CASTING, op_flags,
dtypes, iter_ndim,
op_axes, iter_shape, 0);
@@ -4242,6 +4289,14 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
}
ufunc->doc = doc;
+ ufunc->op_flags = PyArray_malloc(sizeof(npy_uint32)*ufunc->nargs);
@charris

charris Apr 1, 2013

Owner

Success of memory allocation needs to be checked.

@charris

charris May 3, 2013

Owner

Still needs the NULL check. I there an error return available here?

@charris

charris May 9, 2013

Owner

Still missing check for allocation success.

@jayvius

jayvius May 9, 2013

Contributor

Somehow I missed these comments before. The op_flags allocation has a matching PyArray_free in the ufunc_dealloc function, but I do need one a few lines down if _parse_signature fails.

@jayvius

jayvius May 9, 2013

Contributor

Nevermind, the op_flags will still be freed because ufunc_dealloc gets called if _parse_signature fails.

+ if (ufunc->op_flags == NULL) {
+ return PyErr_NoMemory();
+ }
+ memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs);
+
+ ufunc->iter_flags = 0;
+
/* generalized ufunc */
ufunc->core_enabled = 0;
ufunc->core_num_dim_ix = 0;
@@ -4491,6 +4546,9 @@ ufunc_dealloc(PyUFuncObject *ufunc)
if (ufunc->ptr) {
PyArray_free(ufunc->ptr);
}
+ if (ufunc->op_flags) {
+ PyArray_free(ufunc->op_flags);
+ }
Py_XDECREF(ufunc->userloops);
Py_XDECREF(ufunc->obj);
PyArray_free(ufunc);
@@ -123,6 +123,12 @@ ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUS
self->core_dim_ixs = NULL;
self->core_offsets = NULL;
self->core_signature = NULL;
+ self->op_flags = PyArray_malloc(sizeof(npy_uint32)*self->nargs);
@charris

charris Apr 1, 2013

Owner

Success of memory allocation needs checking.

@charris

charris May 3, 2013

Owner

Still needed.

@charris

charris May 3, 2013

Owner

Notice that there is no matching PyArray_free as for the previous allocation of this type. Is that OK?

@charris

charris May 9, 2013

Owner

Does the allocated memory need to be freed?

@jayvius

jayvius May 9, 2013

Contributor

Missed these comments too somehow. op_flags get freed in ufunc_dealloc(). Adding null check now.

+ if (self->op_flags == NULL) {
+ return PyErr_NoMemory();
+ }
+ memset(self->op_flags, 0, sizeof(npy_uint32)*self->nargs);
+ self->iter_flags = 0;
self->type_resolver = &object_ufunc_type_resolver;
self->legacy_inner_loop_selector = &object_ufunc_loop_selector;
Oops, something went wrong.