From db85987e7f665b808cc4a865a26b259b19320724 Mon Sep 17 00:00:00 2001 From: abebus Date: Mon, 27 Oct 2025 23:21:36 +0300 Subject: [PATCH 1/3] this reuses SOME interned strings, but not `utf-8` and friends --- Objects/codeobject.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 0d264a6e346f95..a3502be6e559a0 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -113,7 +113,7 @@ PyCode_ClearWatcher(int watcher_id) #define _PyCodeObject_CAST(op) (assert(PyCode_Check(op)), (PyCodeObject *)(op)) -static int +static inline int should_intern_string(PyObject *o) { #ifdef Py_GIL_DISABLED @@ -196,6 +196,22 @@ intern_strings(PyObject *tuple) return 0; } +static inline PyObject* +get_interned_string(PyObject *interned_dict, PyObject *s) { + if (!PyUnicode_CheckExact(s)) { + return NULL; + } + + PyObject *existing = PyDict_GetItemWithError(interned_dict, s); + if (existing == NULL) { + if (PyErr_Occurred()) { + return NULL; + } + return NULL; + } + return existing; +} + /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string constants. */ @@ -203,10 +219,22 @@ static int intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); + PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (PyUnicode_CheckExact(v)) { - if (should_intern_string(v)) { + if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { + if (PyUnicode_CHECK_INTERNED(v) != 0) { + continue; + } + PyObject *interned = get_interned_string(interned_dict, v); + if (interned != NULL && interned != v) { + Py_INCREF(interned); + PyTuple_SET_ITEM(tuple, i, interned); + Py_DECREF(v); + if (modified) { + *modified = 1; + } + } else if (should_intern_string(v)) { PyObject *w = v; _PyUnicode_InternMortal(interp, &v); if (w != v) { From e448a20b585612fd99ca9009d3dde21bf0a9a116 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Tue, 28 Oct 2025 00:27:32 +0300 Subject: [PATCH 2/3] Update codeobject.c: - unnecessary check --- Objects/codeobject.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index a3502be6e559a0..7dc9af720ffd12 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -198,10 +198,6 @@ intern_strings(PyObject *tuple) static inline PyObject* get_interned_string(PyObject *interned_dict, PyObject *s) { - if (!PyUnicode_CheckExact(s)) { - return NULL; - } - PyObject *existing = PyDict_GetItemWithError(interned_dict, s); if (existing == NULL) { if (PyErr_Occurred()) { From ba291d332651412901ad49d161583ee1c2eea4dc Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Tue, 28 Oct 2025 07:16:51 +0000 Subject: [PATCH 3/3] correct error handling, refcount interned_dict --- Objects/codeobject.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 7dc9af720ffd12..55a7cbaa951dc2 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -196,17 +196,6 @@ intern_strings(PyObject *tuple) return 0; } -static inline PyObject* -get_interned_string(PyObject *interned_dict, PyObject *s) { - PyObject *existing = PyDict_GetItemWithError(interned_dict, s); - if (existing == NULL) { - if (PyErr_Occurred()) { - return NULL; - } - return NULL; - } - return existing; -} /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string @@ -216,13 +205,17 @@ intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); + Py_INCREF(interned_dict); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } - PyObject *interned = get_interned_string(interned_dict, v); + PyObject *interned = PyDict_GetItemWithError(interned_dict, v); + if (interned == NULL && PyErr_Occurred()) { + goto error; + } if (interned != NULL && interned != v) { Py_INCREF(interned); PyTuple_SET_ITEM(tuple, i, interned); @@ -243,25 +236,25 @@ intern_constants(PyObject *tuple, int *modified) } else if (PyTuple_CheckExact(v)) { if (intern_constants(v, NULL) < 0) { - return -1; + goto error; } } else if (PyFrozenSet_CheckExact(v)) { PyObject *w = v; PyObject *tmp = PySequence_Tuple(v); if (tmp == NULL) { - return -1; + goto error; } int tmp_modified = 0; if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); - return -1; + goto error; } if (tmp_modified) { v = PyFrozenSet_New(tmp); if (v == NULL) { Py_DECREF(tmp); - return -1; + goto error; } PyTuple_SET_ITEM(tuple, i, v); @@ -277,7 +270,7 @@ intern_constants(PyObject *tuple, int *modified) PySliceObject *slice = (PySliceObject *)v; PyObject *tmp = PyTuple_New(3); if (tmp == NULL) { - return -1; + goto error; } PyTuple_SET_ITEM(tmp, 0, Py_NewRef(slice->start)); PyTuple_SET_ITEM(tmp, 1, Py_NewRef(slice->stop)); @@ -285,7 +278,7 @@ intern_constants(PyObject *tuple, int *modified) int tmp_modified = 0; if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); - return -1; + goto error; } if (tmp_modified) { v = PySlice_New(PyTuple_GET_ITEM(tmp, 0), @@ -293,7 +286,7 @@ intern_constants(PyObject *tuple, int *modified) PyTuple_GET_ITEM(tmp, 2)); if (v == NULL) { Py_DECREF(tmp); - return -1; + goto error; } PyTuple_SET_ITEM(tuple, i, v); Py_DECREF(slice); @@ -312,7 +305,7 @@ intern_constants(PyObject *tuple, int *modified) { PyObject *interned = intern_one_constant(v); if (interned == NULL) { - return -1; + goto error; } else if (interned != v) { PyTuple_SET_ITEM(tuple, i, interned); @@ -324,7 +317,12 @@ intern_constants(PyObject *tuple, int *modified) } #endif } + Py_DECREF(interned_dict); return 0; + +error: + Py_DECREF(interned_dict); + return -1; } /* Return a shallow copy of a tuple that is