Skip to content

Commit

Permalink
BUG: Make mmap handling safer in frombuffer
Browse files Browse the repository at this point in the history
`frombuffer` always used the old buffer protocol, but this is not
safe in some cases when combined with the "new" one.
The main/only use-case that was ever found for this is that enabling
the new buffer protocol (by going through a memoryview) protects
users from accidentally closing memorymaps while they are still used.

Closes gh-9537
  • Loading branch information
seberg authored and charris committed May 5, 2022
1 parent 65bbf0c commit 1c51494
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 6 deletions.
4 changes: 4 additions & 0 deletions numpy/core/_add_newdocs.py
Expand Up @@ -1557,6 +1557,10 @@
The data of the resulting array will not be byteswapped, but will be
interpreted correctly.
This function creates a view into the original object. This should be safe
in general, but it may make sense to copy the result when the original
object is mutable or untrusted.
Examples
--------
>>> s = b'hello world'
Expand Down
32 changes: 26 additions & 6 deletions numpy/core/src/multiarray/ctors.c
Expand Up @@ -3684,28 +3684,44 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
return NULL;
}

/*
* The array check is probably unnecessary. It preserves the base for
* arrays. This is the "old" buffer protocol, which had no release logic.
* (It was assumed that the result is always a view.)
*
* NOTE: We could also check if `bf_releasebuffer` is defined which should
* be the most precise and safe thing to do. But that should only be
* necessary if unexpected backcompat issues are found downstream.
*/
if (!PyArray_Check(buf)) {
buf = PyMemoryView_FromObject(buf);
if (buf == NULL) {
return NULL;
}
}
else {
Py_INCREF(buf);
}

if (PyObject_GetBuffer(buf, &view, PyBUF_WRITABLE|PyBUF_SIMPLE) < 0) {
writeable = 0;
PyErr_Clear();
if (PyObject_GetBuffer(buf, &view, PyBUF_SIMPLE) < 0) {
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
}
data = (char *)view.buf;
ts = view.len;
/*
* In Python 3 both of the deprecated functions PyObject_AsWriteBuffer and
* PyObject_AsReadBuffer that this code replaces release the buffer. It is
* up to the object that supplies the buffer to guarantee that the buffer
* sticks around after the release.
*/
/* `buf` is an array or a memoryview; so we know `view` does not own data */
PyBuffer_Release(&view);

if ((offset < 0) || (offset > ts)) {
PyErr_Format(PyExc_ValueError,
"offset must be non-negative and no greater than buffer "\
"length (%" NPY_INTP_FMT ")", (npy_intp)ts);
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
Expand All @@ -3718,13 +3734,15 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
if (itemsize == 0) {
PyErr_SetString(PyExc_ValueError,
"cannot determine count if itemsize is 0");
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
if (s % itemsize != 0) {
PyErr_SetString(PyExc_ValueError,
"buffer size must be a multiple"\
" of element size");
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
Expand All @@ -3735,6 +3753,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
PyErr_SetString(PyExc_ValueError,
"buffer is smaller than requested"\
" size");
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
Expand All @@ -3744,6 +3763,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
&PyArray_Type, type,
1, &n, NULL, data,
NPY_ARRAY_DEFAULT, NULL, buf);
Py_DECREF(buf);
if (ret == NULL) {
return NULL;
}
Expand Down
24 changes: 24 additions & 0 deletions numpy/core/tests/test_multiarray.py
Expand Up @@ -18,6 +18,7 @@
import pathlib
import builtins
from decimal import Decimal
import mmap

import numpy as np
import numpy.core._multiarray_tests as _multiarray_tests
Expand Down Expand Up @@ -5349,9 +5350,32 @@ def test_basic(self, byteorder, dtype):
buf = x.tobytes()
assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat)

def test_array_base(self):
arr = np.arange(10)
new = np.frombuffer(arr)
# We currently special case arrays to ensure they are used as a base.
# This could probably be changed (removing the test).
assert new.base is arr

def test_empty(self):
assert_array_equal(np.frombuffer(b''), np.array([]))

@pytest.mark.skipif(IS_PYPY,
reason="PyPy's memoryview currently does not track exports. See: "
"https://foss.heptapod.net/pypy/pypy/-/issues/3724")
def test_mmap_close(self):
# The old buffer protocol was not safe for some things that the new
# one is. But `frombuffer` always used the old one for a long time.
# Checks that it is safe with the new one (using memoryviews)
with tempfile.TemporaryFile(mode='wb') as tmp:
tmp.write(b"asdf")
tmp.flush()
mm = mmap.mmap(tmp.fileno(), 0)
arr = np.frombuffer(mm, dtype=np.uint8)
with pytest.raises(BufferError):
mm.close() # cannot close while array uses the buffer
del arr
mm.close()

class TestFlat:
def setup(self):
Expand Down

0 comments on commit 1c51494

Please sign in to comment.