From 78bac60f7c84783721e82cfb7e00cafad53befa3 Mon Sep 17 00:00:00 2001 From: LloydZ <35182391+cocolato@users.noreply.github.com> Date: Mon, 22 Dec 2025 07:11:14 +0000 Subject: [PATCH] Eliminate redundant refcounting from _LOAD_ATTR_WITH_HINT --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 ++++---- Lib/test/test_capi/test_opt.py | 21 +++++++++++++++++++++ Python/bytecodes.c | 6 ++++-- Python/executor_cases.c.h | 16 +++++----------- Python/generated_cases.c.h | 9 ++++++++- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 8 ++++++++ 9 files changed, 54 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 92423fe51408fc..e0d2e2a3c430e2 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1434,7 +1434,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, OPARG_SIMPLE, 3 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, 4, 5 } } }, [LOAD_ATTR_PROPERTY] = { .nuops = 5, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_PROPERTY_FRAME, 4, 5 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 9 }, { _PUSH_FRAME, OPARG_SIMPLE, 9 } } }, [LOAD_ATTR_SLOT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_SLOT, 1, 3 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, - [LOAD_ATTR_WITH_HINT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_WITH_HINT, 1, 3 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, + [LOAD_ATTR_WITH_HINT] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_WITH_HINT, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, [LOAD_BUILD_CLASS] = { .nuops = 1, .uops = { { _LOAD_BUILD_CLASS, OPARG_SIMPLE, 0 } } }, [LOAD_COMMON_CONSTANT] = { .nuops = 1, .uops = { { _LOAD_COMMON_CONSTANT, OPARG_SIMPLE, 0 } } }, [LOAD_CONST] = { .nuops = 1, .uops = { { _LOAD_CONST, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index a11959fb4057df..c8aa2765a34ef4 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -807,7 +807,7 @@ extern "C" { #define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES_r11 1000 #define _LOAD_ATTR_PROPERTY_FRAME_r11 1001 #define _LOAD_ATTR_SLOT_r11 1002 -#define _LOAD_ATTR_WITH_HINT_r11 1003 +#define _LOAD_ATTR_WITH_HINT_r12 1003 #define _LOAD_BUILD_CLASS_r01 1004 #define _LOAD_BYTECODE_r00 1005 #define _LOAD_COMMON_CONSTANT_r01 1006 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ec374dd5818432..d84c88c92435fe 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -191,7 +191,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG, [_LOAD_ATTR_INSTANCE_VALUE] = HAS_DEOPT_FLAG, [_LOAD_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG, [_LOAD_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_CHECK_ATTR_CLASS] = HAS_EXIT_FLAG, [_LOAD_ATTR_CLASS] = HAS_ESCAPES_FLAG, @@ -1774,7 +1774,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .best = { 1, 1, 1, 1 }, .entries = { { -1, -1, -1 }, - { 1, 1, _LOAD_ATTR_WITH_HINT_r11 }, + { 2, 1, _LOAD_ATTR_WITH_HINT_r12 }, { -1, -1, -1 }, { -1, -1, -1 }, }, @@ -3542,7 +3542,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_LOAD_ATTR_INSTANCE_VALUE_r12] = _LOAD_ATTR_INSTANCE_VALUE, [_LOAD_ATTR_INSTANCE_VALUE_r23] = _LOAD_ATTR_INSTANCE_VALUE, [_LOAD_ATTR_MODULE_r11] = _LOAD_ATTR_MODULE, - [_LOAD_ATTR_WITH_HINT_r11] = _LOAD_ATTR_WITH_HINT, + [_LOAD_ATTR_WITH_HINT_r12] = _LOAD_ATTR_WITH_HINT, [_LOAD_ATTR_SLOT_r11] = _LOAD_ATTR_SLOT, [_CHECK_ATTR_CLASS_r01] = _CHECK_ATTR_CLASS, [_CHECK_ATTR_CLASS_r11] = _CHECK_ATTR_CLASS, @@ -4501,7 +4501,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_LOAD_ATTR_SLOT] = "_LOAD_ATTR_SLOT", [_LOAD_ATTR_SLOT_r11] = "_LOAD_ATTR_SLOT_r11", [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", - [_LOAD_ATTR_WITH_HINT_r11] = "_LOAD_ATTR_WITH_HINT_r11", + [_LOAD_ATTR_WITH_HINT_r12] = "_LOAD_ATTR_WITH_HINT_r12", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_BUILD_CLASS_r01] = "_LOAD_BUILD_CLASS_r01", [_LOAD_COMMON_CONSTANT] = "_LOAD_COMMON_CONSTANT", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 3ea93277dab295..570fb857e19254 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2512,6 +2512,27 @@ class C(): self.assertNotIn("_POP_TOP", uops) self.assertIn("_POP_TOP_NOP", uops) + def test_load_attr_with_hint(self): + def testfunc(n): + class C: + pass + c = C() + c.x = 42 + for i in range(_testinternalcapi.SHARED_KEYS_MAX_SIZE - 1): + setattr(c, f"_{i}", None) + x = 0 + for i in range(n): + x += c.x + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 42 * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_LOAD_ATTR_WITH_HINT", uops) + self.assertNotIn("_POP_TOP", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_int_add_op_refcount_elimination(self): def testfunc(n): c = 1 diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7eb006e686800..9df34f6f36875d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2416,7 +2416,7 @@ dummy_func( unused/5 + _PUSH_NULL_CONDITIONAL; - op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr)) { + op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, o)) { PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = _PyObject_GetManagedDict(owner_o); @@ -2452,13 +2452,15 @@ dummy_func( #else attr = PyStackRef_FromPyObjectNew(attr_o); #endif - PyStackRef_CLOSE(owner); + o = owner; + DEAD(owner); } macro(LOAD_ATTR_WITH_HINT) = unused/1 + _GUARD_TYPE_VERSION + _LOAD_ATTR_WITH_HINT + + POP_TOP + unused/5 + _PUSH_NULL_CONDITIONAL; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b3eff63b30ab55..07a96a93373417 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -8257,11 +8257,12 @@ break; } - case _LOAD_ATTR_WITH_HINT_r11: { + case _LOAD_ATTR_WITH_HINT_r12: { CHECK_CURRENT_CACHED_VALUES(1); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef owner; _PyStackRef attr; + _PyStackRef o; _PyStackRef _stack_item_0 = _tos_cache0; oparg = CURRENT_OPARG(); owner = _stack_item_0; @@ -8335,18 +8336,11 @@ #else attr = PyStackRef_FromPyObjectNew(attr_o); #endif - stack_pointer[0] = attr; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(owner); - stack_pointer = _PyFrame_GetStackPointer(frame); + o = owner; + _tos_cache1 = o; _tos_cache0 = attr; - _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); - stack_pointer += -1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index eaaa5f3bb96abc..6aa8a4a92c1eb1 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8523,6 +8523,8 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; _PyStackRef attr; + _PyStackRef o; + _PyStackRef value; _PyStackRef *null; /* Skip 1 cache entry */ // _GUARD_TYPE_VERSION @@ -8602,9 +8604,14 @@ #else attr = PyStackRef_FromPyObjectNew(attr_o); #endif + o = owner; + } + // _POP_TOP + { + value = o; stack_pointer[-1] = attr; _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(owner); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); } /* Skip 5 cache entries */ diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index cd789db1fe89d2..2bca0a3cd7cc85 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -634,9 +634,10 @@ dummy_func(void) { } } - op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr)) { + op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, o)) { attr = sym_new_not_null(ctx); (void)hint; + o = owner; } op(_LOAD_ATTR_SLOT, (index/1, owner -- attr)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 85d3d041215103..e52d4e1db467b2 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1658,11 +1658,19 @@ } case _LOAD_ATTR_WITH_HINT: { + JitOptRef owner; JitOptRef attr; + JitOptRef o; + owner = stack_pointer[-1]; uint16_t hint = (uint16_t)this_instr->operand0; attr = sym_new_not_null(ctx); (void)hint; + o = owner; + CHECK_STACK_BOUNDS(1); stack_pointer[-1] = attr; + stack_pointer[0] = o; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }