diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 8b00f7b27959b6..a75d00f743b934 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -80,6 +80,21 @@ The following functions are provided: on the heap. +.. function:: heapremove(heap, index, item=None): + + Remove the item at ``heap[index]``, optionally replacing it with *item*, + while maintaining the heap invariant. + + This is more efficient than performing ``del heap[index]`` followed + by :func:`heapify`. + + In case an items *value* has changed and its index is known, this + function can be used to restore the heap invariant: + ``heapremove(heap, item_index, item)`` + + .. versionadded:: 3.13 + + The module also offers three general purpose functions based on heaps. diff --git a/Lib/heapq.py b/Lib/heapq.py index 2fd9d1ff4bf827..80269b8bbbb3eb 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -167,6 +167,38 @@ def heappushpop(heap, item): _siftup(heap, 0) return item +def heapremove(heap, index, item=None): + """Remove the element at the given index maintaining the heap invariant. + + An optional item can be provided to replace the removed item. The removed + item is returned. + This can be used to efficiently remove an item from the heap or + to readjust the heap when the comparative "value" of + an item changes by removing and re-inserting the same item, e.g: + + item.value=new_value + idx = heap.index(item) + heapq.heapremove(heap, idx, item) + """ + result = heap[index] + if index < 0: + index += len(heap) + if index < len(heap) - 1: + # common case + if item is None: + item = heap.pop() + heap[index] = item + index = _siftdown(heap, 0, index) + _siftup(heap, index) + else: + # last item + if item is None: + heap.pop() + else: + heap[index] = item + _siftdown(heap, 0, index) + return result + def heapify(x): """Transform list into a heap, in-place, in O(len(x)) time.""" n = len(x) @@ -217,6 +249,7 @@ def _siftdown(heap, startpos, pos): continue break heap[pos] = newitem + return pos # The child indices of heap index pos are already heaps, and we want to make # a heap at index pos too. We do this by bubbling the smaller child of diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 1aa8e4e289730d..e06205094aaf25 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -263,6 +263,109 @@ def __le__(self, other): self.assertEqual(hsort(data, LT), target) self.assertRaises(TypeError, data, LE) + def test_remove(self): + data = [random.random() for i in range(100)] + self.module.heapify(data) + heapset = set(data) + self.assertEqual(len(data), len(heapset)) + + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, len(data)) + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, -len(data) - 1) + self.check_invariant(data) + self.assertEqual(heapset, set(data)) + + for i in range(len(data)): + i = random.randrange(-len(data), len(data)) + if random.random() < 0.05: + i = 0 # ensure we try 0 + elif random.random() < 0.05: + i = len(data) - 1 + # print(len(data), i) + v = data[i] + heapset.remove(v) + self.assertIs(self.module.heapremove(data, i), v) + self.assertEqual(heapset, set(data)) + self.check_invariant(data) + self.assertFalse(data) + + def test_remove_replace(self): + data = [random.random() for i in range(100)] + self.module.heapify(data) + heapset = set(data) + self.assertEqual(len(data), len(heapset)) + + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, len(data)) + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, -len(data) - 1) + self.check_invariant(data) + self.assertEqual(heapset, set(data)) + + for i in range(200): + i = random.randrange(-len(data), len(data)) + if random.random() < 0.05: + i = 0 # ensure we try 0 + elif random.random() < 0.05: + i = len(data) - 1 + replace = random.random() + # print(len(data), i, replace) + v = data[i] + heapset.remove(v) + heapset.add(replace) + assert self.module.heapremove(data, i, replace) is v + self.assertEqual(heapset, set(data)) + self.check_invariant(data) + self.assertEqual(len(data), 100) + + def test_remove_replace_err(self): + data = [random.random() for i in range(100)] + self.module.heapify(data) + with self.assertRaisesRegex(TypeError, r"not supported.*'str' and 'float'"): + self.module.heapremove(data, 50, "foo") + + def test_remove_ops(self): + # test the efficienty of heapremove as opposed to + # a simple remove and heapify + + def dumbremove(heap, index, item=None): + if item is None: + item = heap.pop() + if index == len(heap): + return + heap[index] = item + self.module.heapify(heap) + + class HC: + opcount = 0 + def __init__(self, val): + self.val = val + def __lt__(self, other): + type(self).opcount += 1 + return self.val < other.val + + data = [HC(random.random()) for i in range(100)] + self.module.heapify(data) + heapcopy = data[:] + state = random.getstate() + + HC.opcount = 0 + while data: + self.module.heapremove(data, random.randrange(0, len(data))) + c1 = HC.opcount + + data = heapcopy + random.setstate(state) + HC.opcount = 0 + while data: + dumbremove(data, random.randrange(0, len(data))) + c2 = HC.opcount + + # we should be at least 10 times more efficient (more like 40) + # print(c1, c2) + self.assertTrue(c2 > c1 * 10) + class TestHeapPython(TestHeap, TestCase): module = py_heapq diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 9d4ec256ee9e3e..56289116ef2de9 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -22,10 +22,11 @@ module _heapq /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d7cca0a2e4c0ceb3]*/ static int -siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) +siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t *ppos) { PyObject *newitem, *parent, **arr; Py_ssize_t parentpos, size; + Py_ssize_t pos = *ppos; int cmp; assert(PyList_Check(heap)); @@ -63,6 +64,7 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr[pos] = parent; pos = parentpos; } + *ppos = pos; return 0; } @@ -113,7 +115,7 @@ siftup(PyListObject *heap, Py_ssize_t pos) pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ - return siftdown(heap, startpos, pos); + return siftdown(heap, startpos, &pos); } /*[clinic input] @@ -132,8 +134,8 @@ _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item) { if (PyList_Append(heap, item)) return NULL; - - if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) + Py_ssize_t pos = PyList_GET_SIZE(heap)-1; + if (siftdown((PyListObject *)heap, 0, &pos)) return NULL; Py_RETURN_NONE; } @@ -279,6 +281,81 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) return returnitem; } + +/*[clinic input] +_heapq.heapremove + + heap: object(subclass_of='&PyList_Type') + index: Py_ssize_t + item: object = NULL + / + +Remove the element at the given index maintaining the heap invariant. + +An optional item can be provided to replace the removed item. The removed +item is returned. +This can be used to efficiently remove an item from the heap or +to readjust the heap when the comparative "value" of +an item changes by removing and re-inserting the same item, e.g: + + item.value=new_value + idx = heap.index(item) + heapq.heapremove(heap, idx, item) +[clinic start generated code]*/ + +static PyObject * +_heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, + PyObject *item) +/*[clinic end generated code: output=2d84b49ff0255276 input=13bbd40fb4dee29e]*/ +{ + PyObject *returnitem; + Py_ssize_t n = PyList_GET_SIZE(heap); + if (index < 0) + index += n; + if (index < 0 || index >= n) { + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + returnitem = PyList_GET_ITEM(heap, index); + if (index < n - 1) { + /* common case */ + if (item) { + PyList_SET_ITEM(heap, index, item); + Py_INCREF(item); + } else { + /* replace with the last value */ + item = PyList_GET_ITEM(heap, n-1); + Py_INCREF(item); + if (PyList_SetSlice(heap, n-1, n, NULL)) { + Py_DECREF(item); + return NULL; + } + PyList_SET_ITEM(heap, index, item); + } + if (siftdown((PyListObject *)heap, 0, &index) || siftup((PyListObject *)heap, index)) { + Py_DECREF(returnitem); + return NULL; + } + } else { + /* the tail case */ + if (item) { + PyList_SET_ITEM(heap, index, item); + Py_INCREF(item); + if (siftdown((PyListObject *)heap, 0, &index)) { + Py_DECREF(returnitem); + return NULL; + } + } else { + Py_INCREF(returnitem); + if (PyList_SetSlice(heap, index, n, NULL)) { + Py_DECREF(returnitem); + return NULL; + } + } + } + return returnitem; +} + static Py_ssize_t keep_top_bit(Py_ssize_t n) { @@ -540,6 +617,7 @@ static PyMethodDef heapq_methods[] = { _HEAPQ__HEAPPOP_MAX_METHODDEF _HEAPQ__HEAPIFY_MAX_METHODDEF _HEAPQ__HEAPREPLACE_MAX_METHODDEF + _HEAPQ_HEAPREMOVE_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 9046307990773b..033af509ab19ce 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -2,6 +2,7 @@ preserve [clinic start generated code]*/ +#include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_heapq_heappush__doc__, @@ -146,6 +147,68 @@ _heapq_heappushpop(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_heapq_heapremove__doc__, +"heapremove($module, heap, index, item=, /)\n" +"--\n" +"\n" +"Remove the element at the given index maintaining the heap invariant.\n" +"\n" +"An optional item can be provided to replace the removed item. The removed\n" +"item is returned.\n" +"This can be used to efficiently remove an item from the heap or\n" +"to readjust the heap when the comparative \"value\" of\n" +"an item changes by removing and re-inserting the same item, e.g:\n" +"\n" +" item.value=new_value\n" +" idx = heap.index(item)\n" +" heapq.heapremove(heap, idx, item)"); + +#define _HEAPQ_HEAPREMOVE_METHODDEF \ + {"heapremove", _PyCFunction_CAST(_heapq_heapremove), METH_FASTCALL, _heapq_heapremove__doc__}, + +static PyObject * +_heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, + PyObject *item); + +static PyObject * +_heapq_heapremove(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *heap; + Py_ssize_t index; + PyObject *item = NULL; + + if (!_PyArg_CheckPositional("heapremove", nargs, 2, 3)) { + goto exit; + } + if (!PyList_Check(args[0])) { + _PyArg_BadArgument("heapremove", "argument 1", "list", args[0]); + goto exit; + } + heap = args[0]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + index = ival; + } + if (nargs < 3) { + goto skip_optional; + } + item = args[2]; +skip_optional: + return_value = _heapq_heapremove_impl(module, heap, index, item); + +exit: + return return_value; +} + PyDoc_STRVAR(_heapq_heapify__doc__, "heapify($module, heap, /)\n" "--\n" @@ -267,4 +330,4 @@ _heapq__heapify_max(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=05f2afdf3bc54c9d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=10ce297f0322c817 input=a9049054013a1b77]*/