From be42f79b68a8f2a062c89aac21d2ea143ce22496 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 11 Apr 2019 11:33:27 +0200 Subject: [PATCH] bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770) Rename _PyMem_IsFreed() function to _PyMem_IsPtrFreed(). The function is now way more efficient, it became a simple comparison on integers, rather than a short loop. It detects also uninitialized bytes and "forbidden bytes" filled by debug hooks on memory allocators. Add unit tests on _PyObject_IsFreed(). (cherry picked from commit 2b00db68554422ec37faba2a80179a0172df6349) --- Include/internal/mem.h | 10 +++++++ Include/pymem.h | 2 -- Lib/test/test_capi.py | 23 ++++++++++++++++ Modules/_testcapimodule.c | 57 +++++++++++++++++++++++++++++++++++++++ Objects/object.c | 15 +++++------ Objects/obmalloc.c | 33 ++++++++++++----------- 6 files changed, 114 insertions(+), 26 deletions(-) diff --git a/Include/internal/mem.h b/Include/internal/mem.h index a731e30e6af7d2..64c07d38f8b348 100644 --- a/Include/internal/mem.h +++ b/Include/internal/mem.h @@ -145,6 +145,16 @@ PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *); #define _PyGC_generation0 _PyRuntime.gc.generation0 +/* Heuristic checking if a pointer value is newly allocated + (uninitialized) or newly freed. The pointer is not dereferenced, only the + pointer value is checked. + + The heuristic relies on the debug hooks on Python memory allocators which + fills newly allocated memory with CLEANBYTE (0xCB) and newly freed memory + with DEADBYTE (0xDB). Detect also "untouchable bytes" marked + with FORBIDDENBYTE (0xFB). */ +PyAPI_FUNC(int) _PyMem_IsPtrFreed(void *ptr); + #ifdef __cplusplus } #endif diff --git a/Include/pymem.h b/Include/pymem.h index ef6e0bb5e25f17..458a6489c75daf 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -55,8 +55,6 @@ PyAPI_FUNC(int) PyTraceMalloc_Untrack( PyAPI_FUNC(PyObject*) _PyTraceMalloc_GetTraceback( unsigned int domain, uintptr_t ptr); - -PyAPI_FUNC(int) _PyMem_IsFreed(void *ptr, size_t size); #endif /* !defined(Py_LIMITED_API) */ diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index 65e0795aba84d2..9d193bb2865d87 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -508,6 +508,29 @@ def test_pyobject_malloc_without_gil(self): code = 'import _testcapi; _testcapi.pyobject_malloc_without_gil()' self.check_malloc_without_gil(code) + def check_pyobject_is_freed(self, func): + code = textwrap.dedent(''' + import gc, os, sys, _testcapi + # Disable the GC to avoid crash on GC collection + gc.disable() + obj = _testcapi.{func}() + error = (_testcapi.pyobject_is_freed(obj) == False) + # Exit immediately to avoid a crash while deallocating + # the invalid object + os._exit(int(error)) + ''') + code = code.format(func=func) + assert_python_ok('-c', code, PYTHONMALLOC=self.PYTHONMALLOC) + + def test_pyobject_is_freed_uninitialized(self): + self.check_pyobject_is_freed('pyobject_uninitialized') + + def test_pyobject_is_freed_forbidden_bytes(self): + self.check_pyobject_is_freed('pyobject_forbidden_bytes') + + def test_pyobject_is_freed_free(self): + self.check_pyobject_is_freed('pyobject_freed') + class PyMemMallocDebugTests(PyMemDebugTests): PYTHONMALLOC = 'malloc_debug' diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 1e33ca872d456a..b864f9270e9db1 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -4227,6 +4227,59 @@ test_pymem_getallocatorsname(PyObject *self, PyObject *args) } +static PyObject* +pyobject_is_freed(PyObject *self, PyObject *op) +{ + int res = _PyObject_IsFreed(op); + return PyBool_FromLong(res); +} + + +static PyObject* +pyobject_uninitialized(PyObject *self, PyObject *args) +{ + PyObject *op = (PyObject *)PyObject_Malloc(sizeof(PyObject)); + if (op == NULL) { + return NULL; + } + /* Initialize reference count to avoid early crash in ceval or GC */ + Py_REFCNT(op) = 1; + /* object fields like ob_type are uninitialized! */ + return op; +} + + +static PyObject* +pyobject_forbidden_bytes(PyObject *self, PyObject *args) +{ + /* Allocate an incomplete PyObject structure: truncate 'ob_type' field */ + PyObject *op = (PyObject *)PyObject_Malloc(offsetof(PyObject, ob_type)); + if (op == NULL) { + return NULL; + } + /* Initialize reference count to avoid early crash in ceval or GC */ + Py_REFCNT(op) = 1; + /* ob_type field is after the memory block: part of "forbidden bytes" + when using debug hooks on memory allocatrs! */ + return op; +} + + +static PyObject* +pyobject_freed(PyObject *self, PyObject *args) +{ + PyObject *op = _PyObject_CallNoArg((PyObject *)&PyBaseObject_Type); + if (op == NULL) { + return NULL; + } + Py_TYPE(op)->tp_dealloc(op); + /* Reset reference count to avoid early crash in ceval or GC */ + Py_REFCNT(op) = 1; + /* object memory is freed! */ + return op; +} + + static PyObject* pyobject_malloc_without_gil(PyObject *self, PyObject *args) { @@ -4788,6 +4841,10 @@ static PyMethodDef TestMethods[] = { {"pymem_api_misuse", pymem_api_misuse, METH_NOARGS}, {"pymem_malloc_without_gil", pymem_malloc_without_gil, METH_NOARGS}, {"pymem_getallocatorsname", test_pymem_getallocatorsname, METH_NOARGS}, + {"pyobject_is_freed", (PyCFunction)(void(*)(void))pyobject_is_freed, METH_O}, + {"pyobject_uninitialized", pyobject_uninitialized, METH_NOARGS}, + {"pyobject_forbidden_bytes", pyobject_forbidden_bytes, METH_NOARGS}, + {"pyobject_freed", pyobject_freed, METH_NOARGS}, {"pyobject_malloc_without_gil", pyobject_malloc_without_gil, METH_NOARGS}, {"tracemalloc_track", tracemalloc_track, METH_VARARGS}, {"tracemalloc_untrack", tracemalloc_untrack, METH_VARARGS}, diff --git a/Objects/object.c b/Objects/object.c index 138df448802784..e4ecaa8ae09143 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -421,18 +421,17 @@ _Py_BreakPoint(void) int _PyObject_IsFreed(PyObject *op) { - uintptr_t ptr = (uintptr_t)op; - if (_PyMem_IsFreed(&ptr, sizeof(ptr))) { + if (_PyMem_IsPtrFreed(op) || _PyMem_IsPtrFreed(op->ob_type)) { return 1; } - int freed = _PyMem_IsFreed(&op->ob_type, sizeof(op->ob_type)); - /* ignore op->ob_ref: the value can have be modified + /* ignore op->ob_ref: its value can have be modified by Py_INCREF() and Py_DECREF(). */ #ifdef Py_TRACE_REFS - freed &= _PyMem_IsFreed(&op->_ob_next, sizeof(op->_ob_next)); - freed &= _PyMem_IsFreed(&op->_ob_prev, sizeof(op->_ob_prev)); + if (_PyMem_IsPtrFreed(op->_ob_next) || _PyMem_IsPtrFreed(op->_ob_prev)) { + return 1; + } #endif - return freed; + return 0; } @@ -449,7 +448,7 @@ _PyObject_Dump(PyObject* op) if (_PyObject_IsFreed(op)) { /* It seems like the object memory has been freed: don't access it to prevent a segmentation fault. */ - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); return; } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3b0c35bcc941a9..7d248d03e8e608 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1955,6 +1955,23 @@ _Py_GetAllocatedBlocks(void) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ +int +_PyMem_IsPtrFreed(void *ptr) +{ + uintptr_t value = (uintptr_t)ptr; +#if SIZEOF_VOID_P == 8 + return (value == (uintptr_t)0xCBCBCBCBCBCBCBCB + || value == (uintptr_t)0xDBDBDBDBDBDBDBDB + || value == (uintptr_t)0xFBFBFBFBFBFBFBFB); +#elif SIZEOF_VOID_P == 4 + return (value == (uintptr_t)0xCBCBCBCB + || value == (uintptr_t)0xDBDBDBDB + || value == (uintptr_t)0xFBFBFBFB); +#else +# error "unknown pointer size" +#endif +} + static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ /* serialno is always incremented via calling this routine. The point is @@ -2091,22 +2108,6 @@ _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) } -/* Heuristic checking if the memory has been freed. Rely on the debug hooks on - Python memory allocators which fills the memory with DEADBYTE (0xDB) when - memory is deallocated. */ -int -_PyMem_IsFreed(void *ptr, size_t size) -{ - unsigned char *bytes = ptr; - for (size_t i=0; i < size; i++) { - if (bytes[i] != DEADBYTE) { - return 0; - } - } - return 1; -} - - /* The debug free first checks the 2*SST bytes on each end for sanity (in particular, that the FORBIDDENBYTEs with the api ID are still intact). Then fills the original bytes with DEADBYTE.