From f594e3506fbeefd1501194e18a1339754e2e7708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 28 Nov 2023 11:32:00 +0000 Subject: [PATCH 1/7] Add the heapremove() function --- Lib/heapq.py | 33 +++++++++++++ Lib/test/test_heapq.py | 43 +++++++++++++++++ Modules/_heapqmodule.c | 83 +++++++++++++++++++++++++++++++-- Modules/clinic/_heapqmodule.c.h | 65 +++++++++++++++++++++++++- 4 files changed, 219 insertions(+), 5 deletions(-) diff --git a/Lib/heapq.py b/Lib/heapq.py index 2fd9d1ff4bf827..ead3bea57beed9 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -167,6 +167,38 @@ def heappushpop(heap, item): _siftup(heap, 0) return item +def heapremove(heap, index, item=None): + """Remove the element at the given index maintaining the heap invariant. + + An optional item can be provided to replace the removed item. The removed + item is returned. + This can be used to efficiently remove an item from the heap or + to readjust the heap when the comparative "value" of + an item changes by removing and re-inserting the same item, e.g: + + item.value=new_value + idx = heap.find(item) + heapq.heapremove(heap, idx, item) + """ + result = heap[index] + if index < 0: + index += len(heap) + if index < len(heap) - 1: + # common case + if item is None: + item = heap.pop() + heap[index] = item + index = _siftdown(heap, 0, index) + _siftup(heap, index) + else: + # last item + if item is None: + heap.pop() + else: + heap[index] = item + _siftdown(heap, 0, index) + return result + def heapify(x): """Transform list into a heap, in-place, in O(len(x)) time.""" n = len(x) @@ -217,6 +249,7 @@ def _siftdown(heap, startpos, pos): continue break heap[pos] = newitem + return pos # The child indices of heap index pos are already heaps, and we want to make # a heap at index pos too. We do this by bubbling the smaller child of diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 1aa8e4e289730d..59beee7d2d4e1e 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -263,6 +263,49 @@ def __le__(self, other): self.assertEqual(hsort(data, LT), target) self.assertRaises(TypeError, data, LE) + def test_remove(self): + data = [random.random() for i in range(100)] + self.module.heapify(data) + + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, len(data)) + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, -len(data) - 1) + + for i in range(len(data)): + i = random.randrange(-len(data), len(data)) + if random.random() < 0.05: + i = 0 # ensure we try 0 + elif random.random() < 0.05: + i = len(data) - 1 + print(len(data), i) + v = data[i] + self.assertIs(self.module.heapremove(data, i), v) + self.check_invariant(data) + self.assertFalse(data) + + def test_remove_replace(self): + data = [random.random() for i in range(100)] + self.module.heapify(data) + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, len(data)) + with self.assertRaises(IndexError) as e: + self.module.heapremove(data, -len(data) - 1) + + for i in range(200): + i = random.randrange(-len(data), len(data)) + if random.random() < 0.05: + i = 0 # ensure we try 0 + elif random.random() < 0.05: + i = len(data) - 1 + replace = random.random() + print(len(data), i, replace) + v = data[i] + assert self.module.heapremove(data, i, replace) is v + self.check_invariant(data) + + self.assertEqual(len(data), 100) + class TestHeapPython(TestHeap, TestCase): module = py_heapq diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 9d4ec256ee9e3e..8aac23fa2f64db 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -22,10 +22,11 @@ module _heapq /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d7cca0a2e4c0ceb3]*/ static int -siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) +siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t *ppos) { PyObject *newitem, *parent, **arr; Py_ssize_t parentpos, size; + Py_ssize_t pos = *ppos; int cmp; assert(PyList_Check(heap)); @@ -63,6 +64,7 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr[pos] = parent; pos = parentpos; } + *ppos = pos; return 0; } @@ -113,7 +115,7 @@ siftup(PyListObject *heap, Py_ssize_t pos) pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ - return siftdown(heap, startpos, pos); + return siftdown(heap, startpos, &pos); } /*[clinic input] @@ -132,8 +134,8 @@ _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item) { if (PyList_Append(heap, item)) return NULL; - - if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) + Py_ssize_t pos = PyList_GET_SIZE(heap)-1; + if (siftdown((PyListObject *)heap, 0, &pos)) return NULL; Py_RETURN_NONE; } @@ -279,6 +281,78 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) return returnitem; } + +/*[clinic input] +_heapq.heapremove + + heap: object(subclass_of='&PyList_Type') + index: Py_ssize_t + item: object = NULL + / + +Remove the element at the given index maintaining the heap invariant. + +An optional item can be provided to replace the removed item. The removed +item is returned. +This can be used to efficiently remove an item from the heap or +to readjust the heap when the comparative "value" of +an item changes by removing and re-inserting the same item, e.g: + + item.value=new_value + idx = heap.find(item) + heapq.heapremove(heap, idx, item) +[clinic start generated code]*/ + +static PyObject * +_heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, + PyObject *item) +/*[clinic end generated code: output=2d84b49ff0255276 input=09260f2c67bd4171]*/ +{ + PyObject *returnitem; + Py_ssize_t n = PyList_GET_SIZE(heap); + if (index < 0) + index += n; + if (index < 0 || index >= n) + { + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + returnitem = PyList_GET_ITEM(heap, index); + if (index < n - 1) + { + /* common case */ + if (item) { + PyList_SET_ITEM(heap, index, item); + Py_INCREF(item); + } else { + /* replace with the popped value */ + item = PyList_GET_ITEM(heap, n-1); + Py_INCREF(item); + if (PyList_SetSlice(heap, n-1, n, NULL)) { + Py_DECREF(item); + return NULL; + } + PyList_SET_ITEM(heap, index, item); + } + siftdown((PyListObject *)heap, 0, &index); + siftup((PyListObject *)heap, index); + } else { + /* the tail case */ + if (item) { + PyList_SET_ITEM(heap, index, item); + Py_INCREF(item); + siftdown((PyListObject *)heap, 0, &index); + } else { + Py_INCREF(returnitem); + if (PyList_SetSlice(heap, index, n, NULL)) { + Py_DECREF(returnitem); + return NULL; + } + } + } + return returnitem; +} + static Py_ssize_t keep_top_bit(Py_ssize_t n) { @@ -540,6 +614,7 @@ static PyMethodDef heapq_methods[] = { _HEAPQ__HEAPPOP_MAX_METHODDEF _HEAPQ__HEAPIFY_MAX_METHODDEF _HEAPQ__HEAPREPLACE_MAX_METHODDEF + _HEAPQ_HEAPREMOVE_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 9046307990773b..10d502d42689ef 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -2,6 +2,7 @@ preserve [clinic start generated code]*/ +#include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_heapq_heappush__doc__, @@ -146,6 +147,68 @@ _heapq_heappushpop(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_heapq_heapremove__doc__, +"heapremove($module, heap, index, item=, /)\n" +"--\n" +"\n" +"Remove the element at the given index maintaining the heap invariant.\n" +"\n" +"An optional item can be provided to replace the removed item. The removed\n" +"item is returned.\n" +"This can be used to efficiently remove an item from the heap or\n" +"to readjust the heap when the comparative \"value\" of\n" +"an item changes by removing and re-inserting the same item, e.g:\n" +"\n" +" item.value=new_value\n" +" idx = heap.find(item)\n" +" heapq.heapremove(heap, idx, item)"); + +#define _HEAPQ_HEAPREMOVE_METHODDEF \ + {"heapremove", _PyCFunction_CAST(_heapq_heapremove), METH_FASTCALL, _heapq_heapremove__doc__}, + +static PyObject * +_heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, + PyObject *item); + +static PyObject * +_heapq_heapremove(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *heap; + Py_ssize_t index; + PyObject *item = NULL; + + if (!_PyArg_CheckPositional("heapremove", nargs, 2, 3)) { + goto exit; + } + if (!PyList_Check(args[0])) { + _PyArg_BadArgument("heapremove", "argument 1", "list", args[0]); + goto exit; + } + heap = args[0]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + index = ival; + } + if (nargs < 3) { + goto skip_optional; + } + item = args[2]; +skip_optional: + return_value = _heapq_heapremove_impl(module, heap, index, item); + +exit: + return return_value; +} + PyDoc_STRVAR(_heapq_heapify__doc__, "heapify($module, heap, /)\n" "--\n" @@ -267,4 +330,4 @@ _heapq__heapify_max(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=05f2afdf3bc54c9d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a752a0808801ab63 input=a9049054013a1b77]*/ From aaf5c1c65708b2addfcf85975051b22d49e14612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 28 Nov 2023 11:43:34 +0000 Subject: [PATCH 2/7] fixes --- Lib/test/test_heapq.py | 10 ++++++++-- Modules/_heapqmodule.c | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 59beee7d2d4e1e..1686fa7dd02732 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -278,7 +278,7 @@ def test_remove(self): i = 0 # ensure we try 0 elif random.random() < 0.05: i = len(data) - 1 - print(len(data), i) + # print(len(data), i) v = data[i] self.assertIs(self.module.heapremove(data, i), v) self.check_invariant(data) @@ -299,13 +299,19 @@ def test_remove_replace(self): elif random.random() < 0.05: i = len(data) - 1 replace = random.random() - print(len(data), i, replace) + # print(len(data), i, replace) v = data[i] assert self.module.heapremove(data, i, replace) is v self.check_invariant(data) self.assertEqual(len(data), 100) + def test_remove_replace_err(self): + data = [random.random() for i in range(100)] + self.module.heapify(data) + with self.assertRaisesRegex(TypeError, r"not supported.*'str' and 'float'"): + self.module.heapremove(data, 50, "foo") + class TestHeapPython(TestHeap, TestCase): module = py_heapq diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 8aac23fa2f64db..2539e714d42ca2 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -312,20 +312,18 @@ _heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, Py_ssize_t n = PyList_GET_SIZE(heap); if (index < 0) index += n; - if (index < 0 || index >= n) - { + if (index < 0 || index >= n) { PyErr_SetString(PyExc_IndexError, "index out of range"); return NULL; } returnitem = PyList_GET_ITEM(heap, index); - if (index < n - 1) - { + if (index < n - 1) { /* common case */ if (item) { PyList_SET_ITEM(heap, index, item); Py_INCREF(item); } else { - /* replace with the popped value */ + /* replace with the last value */ item = PyList_GET_ITEM(heap, n-1); Py_INCREF(item); if (PyList_SetSlice(heap, n-1, n, NULL)) { @@ -334,14 +332,19 @@ _heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, } PyList_SET_ITEM(heap, index, item); } - siftdown((PyListObject *)heap, 0, &index); - siftup((PyListObject *)heap, index); + if (siftdown((PyListObject *)heap, 0, &index) || siftup((PyListObject *)heap, index)) { + Py_DECREF(returnitem); + return NULL; + } } else { /* the tail case */ if (item) { PyList_SET_ITEM(heap, index, item); Py_INCREF(item); - siftdown((PyListObject *)heap, 0, &index); + if (siftdown((PyListObject *)heap, 0, &index)) { + Py_DECREF(returnitem); + return NULL; + } } else { Py_INCREF(returnitem); if (PyList_SetSlice(heap, index, n, NULL)) { From e215da3b7bf8be6087e11f101473cf96c4e49766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 28 Nov 2023 11:49:54 +0000 Subject: [PATCH 3/7] whitespace --- Lib/test/test_heapq.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 1686fa7dd02732..22b22e953dd654 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -271,7 +271,7 @@ def test_remove(self): self.module.heapremove(data, len(data)) with self.assertRaises(IndexError) as e: self.module.heapremove(data, -len(data) - 1) - + for i in range(len(data)): i = random.randrange(-len(data), len(data)) if random.random() < 0.05: @@ -291,7 +291,7 @@ def test_remove_replace(self): self.module.heapremove(data, len(data)) with self.assertRaises(IndexError) as e: self.module.heapremove(data, -len(data) - 1) - + for i in range(200): i = random.randrange(-len(data), len(data)) if random.random() < 0.05: @@ -303,7 +303,6 @@ def test_remove_replace(self): v = data[i] assert self.module.heapremove(data, i, replace) is v self.check_invariant(data) - self.assertEqual(len(data), 100) def test_remove_replace_err(self): @@ -311,7 +310,7 @@ def test_remove_replace_err(self): self.module.heapify(data) with self.assertRaisesRegex(TypeError, r"not supported.*'str' and 'float'"): self.module.heapremove(data, 50, "foo") - + class TestHeapPython(TestHeap, TestCase): module = py_heapq From 293a4458cffc23b660cc3aaff924eaf7aaf76a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 28 Nov 2023 14:59:32 +0000 Subject: [PATCH 4/7] Add operations count test --- Lib/test/test_heapq.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 22b22e953dd654..9fb6ffc4760614 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -311,6 +311,47 @@ def test_remove_replace_err(self): with self.assertRaisesRegex(TypeError, r"not supported.*'str' and 'float'"): self.module.heapremove(data, 50, "foo") + def test_remove_ops(self): + # test the efficienty of heapremove as opposed to + # a simple remove and heapify + + def dumbremove(heap, index, item=None): + if item is None: + item = heap.pop() + if index == len(heap): + return + heap[index] = item + self.module.heapify(heap) + + class HC: + opcount = 0 + def __init__(self, val): + self.val = val + def __lt__(self, other): + type(self).opcount += 1 + return self.val < other.val + + data = [HC(random.random()) for i in range(100)] + self.module.heapify(data) + heapcopy = data[:] + state = random.getstate() + + HC.opcount = 0 + while data: + self.module.heapremove(data, random.randrange(0, len(data))) + c1 = HC.opcount + + data = heapcopy + random.setstate(state) + HC.opcount = 0 + while data: + dumbremove(data, random.randrange(0, len(data))) + c2 = HC.opcount + + # we should be at least 10 times more efficient (more like 40) + # print(c1, c2) + self.assertTrue(c2 > c1 * 10) + class TestHeapPython(TestHeap, TestCase): module = py_heapq From 6991ffea99be98c328af6d594b2d5c33340bbb63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 28 Nov 2023 17:16:46 +0000 Subject: [PATCH 5/7] documentation --- Doc/library/heapq.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 8b00f7b27959b6..a75d00f743b934 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -80,6 +80,21 @@ The following functions are provided: on the heap. +.. function:: heapremove(heap, index, item=None): + + Remove the item at ``heap[index]``, optionally replacing it with *item*, + while maintaining the heap invariant. + + This is more efficient than performing ``del heap[index]`` followed + by :func:`heapify`. + + In case an items *value* has changed and its index is known, this + function can be used to restore the heap invariant: + ``heapremove(heap, item_index, item)`` + + .. versionadded:: 3.13 + + The module also offers three general purpose functions based on heaps. From 07babe53a5b2b2d401d602c1547f97ea36a513e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 28 Nov 2023 17:44:38 +0000 Subject: [PATCH 6/7] fix doc --- Lib/heapq.py | 2 +- Modules/_heapqmodule.c | 4 ++-- Modules/clinic/_heapqmodule.c.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/heapq.py b/Lib/heapq.py index ead3bea57beed9..80269b8bbbb3eb 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -177,7 +177,7 @@ def heapremove(heap, index, item=None): an item changes by removing and re-inserting the same item, e.g: item.value=new_value - idx = heap.find(item) + idx = heap.index(item) heapq.heapremove(heap, idx, item) """ result = heap[index] diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 2539e714d42ca2..56289116ef2de9 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -299,14 +299,14 @@ to readjust the heap when the comparative "value" of an item changes by removing and re-inserting the same item, e.g: item.value=new_value - idx = heap.find(item) + idx = heap.index(item) heapq.heapremove(heap, idx, item) [clinic start generated code]*/ static PyObject * _heapq_heapremove_impl(PyObject *module, PyObject *heap, Py_ssize_t index, PyObject *item) -/*[clinic end generated code: output=2d84b49ff0255276 input=09260f2c67bd4171]*/ +/*[clinic end generated code: output=2d84b49ff0255276 input=13bbd40fb4dee29e]*/ { PyObject *returnitem; Py_ssize_t n = PyList_GET_SIZE(heap); diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 10d502d42689ef..033af509ab19ce 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -160,7 +160,7 @@ PyDoc_STRVAR(_heapq_heapremove__doc__, "an item changes by removing and re-inserting the same item, e.g:\n" "\n" " item.value=new_value\n" -" idx = heap.find(item)\n" +" idx = heap.index(item)\n" " heapq.heapremove(heap, idx, item)"); #define _HEAPQ_HEAPREMOVE_METHODDEF \ @@ -330,4 +330,4 @@ _heapq__heapify_max(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=a752a0808801ab63 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=10ce297f0322c817 input=a9049054013a1b77]*/ From 1f29a836a6c76182d266749490b4d10b88854ba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Fri, 1 Dec 2023 11:09:02 +0000 Subject: [PATCH 7/7] stricter tests --- Lib/test/test_heapq.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 9fb6ffc4760614..e06205094aaf25 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -266,11 +266,15 @@ def __le__(self, other): def test_remove(self): data = [random.random() for i in range(100)] self.module.heapify(data) + heapset = set(data) + self.assertEqual(len(data), len(heapset)) with self.assertRaises(IndexError) as e: self.module.heapremove(data, len(data)) with self.assertRaises(IndexError) as e: self.module.heapremove(data, -len(data) - 1) + self.check_invariant(data) + self.assertEqual(heapset, set(data)) for i in range(len(data)): i = random.randrange(-len(data), len(data)) @@ -280,17 +284,24 @@ def test_remove(self): i = len(data) - 1 # print(len(data), i) v = data[i] + heapset.remove(v) self.assertIs(self.module.heapremove(data, i), v) + self.assertEqual(heapset, set(data)) self.check_invariant(data) self.assertFalse(data) def test_remove_replace(self): data = [random.random() for i in range(100)] self.module.heapify(data) + heapset = set(data) + self.assertEqual(len(data), len(heapset)) + with self.assertRaises(IndexError) as e: self.module.heapremove(data, len(data)) with self.assertRaises(IndexError) as e: self.module.heapremove(data, -len(data) - 1) + self.check_invariant(data) + self.assertEqual(heapset, set(data)) for i in range(200): i = random.randrange(-len(data), len(data)) @@ -301,7 +312,10 @@ def test_remove_replace(self): replace = random.random() # print(len(data), i, replace) v = data[i] + heapset.remove(v) + heapset.add(replace) assert self.module.heapremove(data, i, replace) is v + self.assertEqual(heapset, set(data)) self.check_invariant(data) self.assertEqual(len(data), 100)