Skip to content

Commit

Permalink
ENH: Improved performance of PyArray_FromAny for sequences of array-like
Browse files Browse the repository at this point in the history
Prior to this commit

  np.array([array_like])

would recursively copy each element of array_like. This is due to the
fact that setArrayFromSequence only special-cased lists of NumPy arrays,
any other object was treated as a sequence even if it supported buffer
or __array*__ interfaces. See tensorflow/tensorflow#27692 for details.

The commit generalizes the special-case in setArrayFromSequence to any
array-like, i.e. a buffer or an object with __array__, __array_interface__
__array_struct__.
  • Loading branch information
superbobry committed May 19, 2019
1 parent f91b033 commit 71fc59d
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 57 deletions.
4 changes: 4 additions & 0 deletions benchmarks/benchmarks/bench_core.py
Expand Up @@ -10,6 +10,7 @@ def setup(self):
self.l100 = range(100)
self.l50 = range(50)
self.l = [np.arange(1000), np.arange(1000)]
self.l_view = [memoryview(a) for a in self.l]
self.l10x10 = np.ones((10, 10))

def time_array_1(self):
Expand All @@ -27,6 +28,9 @@ def time_array_l100(self):
def time_array_l(self):
np.array(self.l)

def time_array_l_view(self):
np.array(self.l_view)

def time_vstack_l(self):
np.vstack(self.l)

Expand Down
170 changes: 113 additions & 57 deletions numpy/core/src/multiarray/ctors.c
Expand Up @@ -422,6 +422,10 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems,
}
}

NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op, PyArray_Descr *requested_dtype,
npy_bool writeable, PyObject *context);

/*
* adapted from Numarray,
* a: destination array
Expand All @@ -435,6 +439,7 @@ static int
setArrayFromSequence(PyArrayObject *a, PyObject *s,
int dim, PyArrayObject * dst)
{
PyObject *tmp;
Py_ssize_t i, slen;
int res = -1;

Expand Down Expand Up @@ -478,6 +483,22 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s,
goto fail;
}

tmp = _array_from_array_like(s, /*dtype*/NULL, /*writeable*/0, /*context*/NULL);
if (tmp == NULL) {
goto fail;
}
else if (tmp != Py_NotImplemented) {
if (PyArray_CopyInto(dst, (PyArrayObject *)tmp) < 0) {
goto fail;
}

Py_DECREF(s);
return 0;
}
else {
Py_DECREF(Py_NotImplemented);
}

slen = PySequence_Length(s);
if (slen < 0) {
goto fail;
Expand Down Expand Up @@ -1481,6 +1502,90 @@ _array_from_buffer_3118(PyObject *memoryview)

}


/*
* Attempts to extract an array from an array-like object.
*
* array-like is defined as either
*
* * an object implementing the PEP 3118 buffer interface;
* * an object with __array_struct__ or __array_interface__ attributes;
* * an object with an __array__ function.
*
* Returns Py_NotImplemented if a given object is not array-like;
* PyArrayObject* in case of success and NULL in case of failure.
*/
NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op, PyArray_Descr *requested_dtype,
npy_bool writeable, PyObject *context) {
PyObject* tmp;

/* If op supports the PEP 3118 buffer interface */
if (!PyBytes_Check(op) && !PyUnicode_Check(op)) {
PyObject *memoryview = PyMemoryView_FromObject(op);
if (memoryview == NULL) {
PyErr_Clear();
}
else {
tmp = _array_from_buffer_3118(memoryview);
Py_DECREF(memoryview);
if (tmp == NULL) {
return NULL;
}

if (writeable
&& PyArray_FailUnlessWriteable((PyArrayObject *) tmp, "PEP 3118 buffer") < 0) {
Py_DECREF(tmp);
return NULL;
}

return tmp;
}
}

/* If op supports the __array_struct__ or __array_interface__ interface */
tmp = PyArray_FromStructInterface(op);
if (tmp == NULL) {
return NULL;
}
if (tmp == Py_NotImplemented) {
tmp = PyArray_FromInterface(op);
if (tmp == NULL) {
return NULL;
}
}

/*
* If op supplies the __array__ function.
* The documentation says this should produce a copy, so
* we skip this method if writeable is true, because the intent
* of writeable is to modify the operand.
* XXX: If the implementation is wrong, and/or if actual
* usage requires this behave differently,
* this should be changed!
*/
if (!writeable && tmp == Py_NotImplemented) {
tmp = PyArray_FromArrayAttr(op, requested_dtype, context);
if (tmp == NULL) {
return NULL;
}
}

if (tmp != Py_NotImplemented) {
if (writeable
&& PyArray_FailUnlessWriteable((PyArrayObject *) tmp,
"array interface object") < 0) {
Py_DECREF(tmp);
return NULL;
}
return tmp;
}

Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}


/*NUMPY_API
* Retrieves the array parameters for viewing/converting an arbitrary
* PyObject* to a NumPy array. This allows the "innate type and shape"
Expand Down Expand Up @@ -1588,69 +1693,20 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
return 0;
}

/* If op supports the PEP 3118 buffer interface */
if (!PyBytes_Check(op) && !PyUnicode_Check(op)) {

PyObject *memoryview = PyMemoryView_FromObject(op);
if (memoryview == NULL) {
PyErr_Clear();
}
else {
PyObject *arr = _array_from_buffer_3118(memoryview);
Py_DECREF(memoryview);
if (arr == NULL) {
return -1;
}
if (writeable
&& PyArray_FailUnlessWriteable((PyArrayObject *)arr, "PEP 3118 buffer") < 0) {
Py_DECREF(arr);
return -1;
}
*out_arr = (PyArrayObject *)arr;
return 0;
}
}

/* If op supports the __array_struct__ or __array_interface__ interface */
tmp = PyArray_FromStructInterface(op);
/* If op is an array-like */
tmp = _array_from_array_like(op, requested_dtype, writeable, context);
if (tmp == NULL) {
return -1;
}
if (tmp == Py_NotImplemented) {
tmp = PyArray_FromInterface(op);
if (tmp == NULL) {
return -1;
}
}
if (tmp != Py_NotImplemented) {
if (writeable
&& PyArray_FailUnlessWriteable((PyArrayObject *)tmp,
"array interface object") < 0) {
Py_DECREF(tmp);
return -1;
}
*out_arr = (PyArrayObject *)tmp;
return (*out_arr) == NULL ? -1 : 0;
else if (tmp != Py_NotImplemented) {
*out_arr = (PyArrayObject*) tmp;
return 0;
}

/*
* If op supplies the __array__ function.
* The documentation says this should produce a copy, so
* we skip this method if writeable is true, because the intent
* of writeable is to modify the operand.
* XXX: If the implementation is wrong, and/or if actual
* usage requires this behave differently,
* this should be changed!
*/
if (!writeable) {
tmp = PyArray_FromArrayAttr(op, requested_dtype, context);
if (tmp != Py_NotImplemented) {
*out_arr = (PyArrayObject *)tmp;
return (*out_arr) == NULL ? -1 : 0;
}
else {
Py_DECREF(Py_NotImplemented);
}

/* Try to treat op as a list of lists */
/* Try to treat op as a list of lists or array-like objects. */
if (!writeable && PySequence_Check(op)) {
int check_it, stop_at_string, stop_at_tuple, is_object;
int type_num, type;
Expand Down
23 changes: 23 additions & 0 deletions numpy/core/tests/test_multiarray.py
Expand Up @@ -878,6 +878,29 @@ def test_sequence_long(self):
assert_equal(np.array([long(4), 2**80, long(4)]).dtype, object)
assert_equal(np.array([2**80, long(4)]).dtype, object)

def test_sequence_of_array_like(self):
class ArrayLike:
def __init__(self):
self.__array_interface__ = {
"shape": (42,),
"typestr": "<i1",
"data": bytes(42)
}

# Make sure __array_*__ is used instead of Sequence methods.
def __iter__(self):
raise AssertionError("__iter__ was called")

def __getitem__(self, idx):
raise AssertionError("__getitem__ was called")

def __len__(self):
return 42

assert_equal(
np.array([ArrayLike()]),
np.zeros((1, 42), dtype=np.byte))

def test_non_sequence_sequence(self):
"""Should not segfault.
Expand Down

0 comments on commit 71fc59d

Please sign in to comment.