Skip to content

Commit

Permalink
Merge pull request #4567 from juliantaylor/obj-array-create-opt
Browse files Browse the repository at this point in the history
ENH: optimize array creation from homogenous python sequences
  • Loading branch information
charris committed Mar 30, 2014
2 parents ceeeb1a + 012a353 commit 15a87e2
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 59 deletions.
48 changes: 35 additions & 13 deletions numpy/core/src/multiarray/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,10 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
PyArray_Descr *dtype = NULL;
PyObject *ip;
Py_buffer buffer_view;
/* types for sequence handling */
PyObject ** objects;
PyObject * seq;
PyTypeObject * common_type;

/* Check if it's an ndarray */
if (PyArray_Check(obj)) {
Expand Down Expand Up @@ -514,31 +518,49 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
return 0;
}

/* Recursive case */
size = PySequence_Size(obj);
if (size < 0) {
/* Recursive case, first check the sequence contains only one type */
seq = PySequence_Fast(obj, "Could not convert object to sequence");
if (seq == NULL) {
goto fail;
}
size = PySequence_Fast_GET_SIZE(seq);
objects = PySequence_Fast_ITEMS(seq);
common_type = size > 0 ? Py_TYPE(objects[0]) : NULL;
for (i = 1; i < size; ++i) {
if (Py_TYPE(objects[i]) != common_type) {
common_type = NULL;
break;
}
}

/* all types are the same and scalar, one recursive call is enough */
if (common_type != NULL && !string_type &&
(common_type == &PyFloat_Type ||
/* TODO: we could add longs if we add a range check */
#if !defined(NPY_PY3K)
common_type == &PyInt_Type ||
#endif
common_type == &PyBool_Type ||
common_type == &PyComplex_Type)) {
size = 1;
}

/* Recursive call for each sequence item */
for (i = 0; i < size; ++i) {
int res;
ip = PySequence_GetItem(obj, i);
if (ip == NULL) {
goto fail;
}
res = PyArray_DTypeFromObjectHelper(ip, maxdims - 1,
out_dtype, string_type);
int res = PyArray_DTypeFromObjectHelper(objects[i], maxdims - 1,
out_dtype, string_type);
if (res < 0) {
Py_DECREF(ip);
Py_DECREF(seq);
goto fail;
}
else if (res > 0) {
Py_DECREF(ip);
Py_DECREF(seq);
return res;
}
Py_DECREF(ip);
}

Py_DECREF(seq);

return 0;


Expand Down
87 changes: 41 additions & 46 deletions numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,15 +503,18 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s,
}
/* Copy element by element */
else {
PyObject * seq;
seq = PySequence_Fast(s, "Could not convert object to sequence");
if (seq == NULL) {
goto fail;
}
for (i = 0; i < slen; i++) {
PyObject *o = PySequence_GetItem(s, i);
if (o == NULL) {
goto fail;
}
PyObject * o = PySequence_Fast_GET_ITEM(seq, i);
if ((PyArray_NDIM(a) - dim) > 1) {
PyArrayObject * tmp =
(PyArrayObject *)array_item_asarray(dst, i);
if (tmp == NULL) {
Py_DECREF(seq);
goto fail;
}

Expand All @@ -522,11 +525,12 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s,
char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
res = PyArray_DESCR(dst)->f->setitem(o, b, dst);
}
Py_DECREF(o);
if (res < 0) {
Py_DECREF(seq);
goto fail;
}
}
Py_DECREF(seq);
}

Py_DECREF(s);
Expand Down Expand Up @@ -640,6 +644,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
PyObject *e;
int r, n, i;
Py_buffer buffer_view;
PyObject * seq;

if (*maxndim == 0) {
return 0;
Expand Down Expand Up @@ -782,75 +787,63 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
}
}

n = PySequence_Size(obj);

if (n < 0) {
return -1;
seq = PySequence_Fast(obj, "Could not convert object to sequence");
if (seq == NULL) {
/*
* PySequence_Check detects whether an old type object is a
* sequence by the presence of the __getitem__ attribute, and
* for new type objects that aren't dictionaries by the
* presence of the __len__ attribute as well. In either case it
* is possible to have an object that tests as a sequence but
* doesn't behave as a sequence and consequently, the
* PySequence_GetItem call can fail. When that happens and the
* object looks like a dictionary, we truncate the dimensions
* and set the object creation flag, otherwise we pass the
* error back up the call chain.
*/
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
PyErr_Clear();
*maxndim = 0;
*out_is_object = 1;
return 0;
}
else {
return -1;
}
}
n = PySequence_Fast_GET_SIZE(seq);

d[0] = n;

/* 1-dimensional sequence */
if (n == 0 || *maxndim == 1) {
*maxndim = 1;
Py_DECREF(seq);
return 0;
}
else {
npy_intp dtmp[NPY_MAXDIMS];
int j, maxndim_m1 = *maxndim - 1;
e = PySequence_Fast_GET_ITEM(seq, 0);

if ((e = PySequence_GetItem(obj, 0)) == NULL) {
/*
* PySequence_Check detects whether an old type object is a
* sequence by the presence of the __getitem__ attribute, and
* for new type objects that aren't dictionaries by the
* presence of the __len__ attribute as well. In either case it
* is possible to have an object that tests as a sequence but
* doesn't behave as a sequence and consequently, the
* PySequence_GetItem call can fail. When that happens and the
* object looks like a dictionary, we truncate the dimensions
* and set the object creation flag, otherwise we pass the
* error back up the call chain.
*/
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
PyErr_Clear();
*maxndim = 0;
*out_is_object = 1;
return 0;
}
else {
return -1;
}
}
r = discover_dimensions(e, &maxndim_m1, d + 1, check_it,
stop_at_string, stop_at_tuple,
out_is_object);
Py_DECREF(e);
if (r < 0) {
Py_DECREF(seq);
return r;
}

/* For the dimension truncation check below */
*maxndim = maxndim_m1 + 1;
for (i = 1; i < n; ++i) {
e = PySequence_Fast_GET_ITEM(seq, i);
/* Get the dimensions of the first item */
if ((e = PySequence_GetItem(obj, i)) == NULL) {
/* see comment above */
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
PyErr_Clear();
*maxndim = 0;
*out_is_object = 1;
return 0;
}
else {
return -1;
}
}
r = discover_dimensions(e, &maxndim_m1, dtmp, check_it,
stop_at_string, stop_at_tuple,
out_is_object);
Py_DECREF(e);
if (r < 0) {
Py_DECREF(seq);
return r;
}

Expand All @@ -872,6 +865,8 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
}
}

Py_DECREF(seq);

return 0;
}

Expand Down
16 changes: 16 additions & 0 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,22 @@ def test_zeros_obj(self):
assert_array_equal(d, [0] * 13)
assert_equal(np.count_nonzero(d), 0)

def test_sequence_non_homogenous(self):
assert_equal(np.array([4, 2**80]).dtype, np.object)
assert_equal(np.array([4, 2**80, 4]).dtype, np.object)
assert_equal(np.array([2**80, 4]).dtype, np.object)
assert_equal(np.array([2**80] * 3).dtype, np.object)
assert_equal(np.array([[1, 1],[1j, 1j]]).dtype, np.complex)
assert_equal(np.array([[1j, 1j],[1, 1]]).dtype, np.complex)
assert_equal(np.array([[1, 1, 1],[1, 1j, 1.], [1, 1, 1]]).dtype, np.complex)

@dec.skipif(sys.version_info[0] >= 3)
def test_sequence_long(self):
assert_equal(np.array([long(4), long(4)]).dtype, np.long)
assert_equal(np.array([long(4), 2**80]).dtype, np.object)
assert_equal(np.array([long(4), 2**80, long(4)]).dtype, np.object)
assert_equal(np.array([2**80, long(4)]).dtype, np.object)

def test_non_sequence_sequence(self):
"""Should not segfault.
Expand Down

0 comments on commit 15a87e2

Please sign in to comment.