From 78070613053ca780b7de11dcd26f5849a707aead Mon Sep 17 00:00:00 2001 From: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> Date: Wed, 12 Jul 2023 22:50:45 -0400 Subject: [PATCH] gh-105235: Prevent reading outside buffer during mmap.find() (GH-105252) * Add a special case for s[-m:] == p in _PyBytes_Find * Add tests for _PyBytes_Find * Make sure that start <= end in mmap.find (cherry picked from commit ab86426a3472ab68747815299d390b213793c3d1) Co-authored-by: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> --- Lib/test/test_mmap.py | 21 ++++ ...-06-02-19-37-29.gh-issue-105235.fgFGTi.rst | 1 + Modules/_testinternalcapi.c | 114 ++++++++++++++++++ Modules/mmapmodule.c | 7 +- Objects/bytesobject.c | 21 +++- 5 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-06-02-19-37-29.gh-issue-105235.fgFGTi.rst diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 517cbe0cb115ab..bab868600895c1 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -299,6 +299,27 @@ def test_find_end(self): self.assertEqual(m.find(b'one', 1, -2), -1) self.assertEqual(m.find(bytearray(b'one')), 0) + for i in range(-n-1, n+1): + for j in range(-n-1, n+1): + for p in [b"o", b"on", b"two", b"ones", b"s"]: + expected = data.find(p, i, j) + self.assertEqual(m.find(p, i, j), expected, (p, i, j)) + + def test_find_does_not_access_beyond_buffer(self): + try: + flags = mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS + PAGESIZE = mmap.PAGESIZE + PROT_NONE = 0 + PROT_READ = mmap.PROT_READ + except AttributeError as e: + raise unittest.SkipTest("mmap flags unavailable") from e + for i in range(0, 2049): + with mmap.mmap(-1, PAGESIZE * (i + 1), + flags=flags, prot=PROT_NONE) as guard: + with mmap.mmap(-1, PAGESIZE * (i + 2048), + flags=flags, prot=PROT_READ) as fm: + fm.find(b"fo", -2) + def test_rfind(self): # test the new 'end' parameter works as expected diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-02-19-37-29.gh-issue-105235.fgFGTi.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-02-19-37-29.gh-issue-105235.fgFGTi.rst new file mode 100644 index 00000000000000..c28d0101cd4bad --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-02-19-37-29.gh-issue-105235.fgFGTi.rst @@ -0,0 +1 @@ +Prevent out-of-bounds memory access during ``mmap.find()`` calls. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 7e2def7c20b1bb..98f99e4d66cf1f 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -15,6 +15,7 @@ #include "frameobject.h" #include "pycore_atomic_funcs.h" // _Py_atomic_int_get() #include "pycore_bitutils.h" // _Py_bswap32() +#include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble #include "pycore_ceval.h" // _PyEval_AddPendingCall #include "pycore_fileutils.h" // _Py_normpath @@ -441,6 +442,118 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) } +static int +check_bytes_find(const char *haystack0, const char *needle0, + int offset, Py_ssize_t expected) +{ + Py_ssize_t len_haystack = strlen(haystack0); + Py_ssize_t len_needle = strlen(needle0); + Py_ssize_t result_1 = _PyBytes_Find(haystack0, len_haystack, + needle0, len_needle, offset); + if (result_1 != expected) { + PyErr_Format(PyExc_AssertionError, + "Incorrect result_1: '%s' in '%s' (offset=%zd)", + needle0, haystack0, offset); + return -1; + } + // Allocate new buffer with no NULL terminator. + char *haystack = PyMem_Malloc(len_haystack); + if (haystack == NULL) { + PyErr_NoMemory(); + return -1; + } + char *needle = PyMem_Malloc(len_needle); + if (needle == NULL) { + PyMem_Free(haystack); + PyErr_NoMemory(); + return -1; + } + memcpy(haystack, haystack0, len_haystack); + memcpy(needle, needle0, len_needle); + Py_ssize_t result_2 = _PyBytes_Find(haystack, len_haystack, + needle, len_needle, offset); + PyMem_Free(haystack); + PyMem_Free(needle); + if (result_2 != expected) { + PyErr_Format(PyExc_AssertionError, + "Incorrect result_2: '%s' in '%s' (offset=%zd)", + needle0, haystack0, offset); + return -1; + } + return 0; +} + +static int +check_bytes_find_large(Py_ssize_t len_haystack, Py_ssize_t len_needle, + const char *needle) +{ + char *zeros = PyMem_RawCalloc(len_haystack, 1); + if (zeros == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t res = _PyBytes_Find(zeros, len_haystack, needle, len_needle, 0); + PyMem_RawFree(zeros); + if (res != -1) { + PyErr_Format(PyExc_AssertionError, + "check_bytes_find_large(%zd, %zd) found %zd", + len_haystack, len_needle, res); + return -1; + } + return 0; +} + +static PyObject * +test_bytes_find(PyObject *self, PyObject *Py_UNUSED(args)) +{ + #define CHECK(H, N, O, E) do { \ + if (check_bytes_find(H, N, O, E) < 0) { \ + return NULL; \ + } \ + } while (0) + + CHECK("", "", 0, 0); + CHECK("Python", "", 0, 0); + CHECK("Python", "", 3, 3); + CHECK("Python", "", 6, 6); + CHECK("Python", "yth", 0, 1); + CHECK("ython", "yth", 1, 1); + CHECK("thon", "yth", 2, -1); + CHECK("Python", "thon", 0, 2); + CHECK("ython", "thon", 1, 2); + CHECK("thon", "thon", 2, 2); + CHECK("hon", "thon", 3, -1); + CHECK("Pytho", "zz", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ab", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ba", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bb", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "ab", 0, 30); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", "ba", 0, 30); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb", "bb", 0, 30); + #undef CHECK + + // Hunt for segfaults + // n, m chosen here so that (n - m) % (m + 1) == 0 + // This would make default_find in fastsearch.h access haystack[n]. + if (check_bytes_find_large(2048, 2, "ab") < 0) { + return NULL; + } + if (check_bytes_find_large(4096, 16, "0123456789abcdef") < 0) { + return NULL; + } + if (check_bytes_find_large(8192, 2, "ab") < 0) { + return NULL; + } + if (check_bytes_find_large(16384, 4, "abcd") < 0) { + return NULL; + } + if (check_bytes_find_large(32768, 2, "ab") < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject * normalize_path(PyObject *self, PyObject *filename) { @@ -950,6 +1063,7 @@ static PyMethodDef module_functions[] = { {"reset_path_config", test_reset_path_config, METH_NOARGS}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, {"test_edit_cost", test_edit_cost, METH_NOARGS}, + {"test_bytes_find", test_bytes_find, METH_NOARGS}, {"normalize_path", normalize_path, METH_O, NULL}, {"get_getpath_codeobject", get_getpath_codeobject, METH_NOARGS, NULL}, {"EncodeLocaleEx", encode_locale_ex, METH_VARARGS}, diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 6bde9939eaa2ca..1ffadccb916253 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -343,12 +343,17 @@ mmap_gfind(mmap_object *self, Py_ssize_t res; CHECK_VALID_OR_RELEASE(NULL, view); - if (reverse) { + if (end < start) { + res = -1; + } + else if (reverse) { + assert(0 <= start && start <= end && end <= self->size); res = _PyBytes_ReverseFind( self->data + start, end - start, view.buf, view.len, start); } else { + assert(0 <= start && start <= end && end <= self->size); res = _PyBytes_Find( self->data + start, end - start, view.buf, view.len, start); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index abbf3eeb16c35c..f3a978c86c3606 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1274,8 +1274,25 @@ _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack, const char *needle, Py_ssize_t len_needle, Py_ssize_t offset) { - return stringlib_find(haystack, len_haystack, - needle, len_needle, offset); + assert(len_haystack >= 0); + assert(len_needle >= 0); + // Extra checks because stringlib_find accesses haystack[len_haystack]. + if (len_needle == 0) { + return offset; + } + if (len_needle > len_haystack) { + return -1; + } + assert(len_haystack >= 1); + Py_ssize_t res = stringlib_find(haystack, len_haystack - 1, + needle, len_needle, offset); + if (res == -1) { + Py_ssize_t last_align = len_haystack - len_needle; + if (memcmp(haystack + last_align, needle, len_needle) == 0) { + return offset + last_align; + } + } + return res; } Py_ssize_t