diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 82dc9e4bdc62f8..0f6613b6c1e3da 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -263,7 +263,7 @@ extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames, int oparg); extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, - int oparg); + int oparg, PyObject **locals); extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-17-22-47-29.bpo-47053.QAXk8Q.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-17-22-47-29.bpo-47053.QAXk8Q.rst new file mode 100644 index 00000000000000..097105b1c20f43 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-17-22-47-29.bpo-47053.QAXk8Q.rst @@ -0,0 +1 @@ +Reduce de-optimization in the specialized ``BINARY_OP_INPLACE_ADD_UNICODE`` opcode. diff --git a/Python/ceval.c b/Python/ceval.c index e1d961fee6a832..4824b192f4e48b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2002,10 +2002,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *right = TOP(); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); - DEOPT_IF(Py_REFCNT(left) != 2, BINARY_OP); _Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP]; int next_oparg = _Py_OPARG(true_next); - assert(_Py_OPCODE(true_next) == STORE_FAST); + assert(_Py_OPCODE(true_next) == STORE_FAST || + _Py_OPCODE(true_next) == STORE_FAST__LOAD_FAST); /* In the common case, there are 2 references to the value * stored in 'variable' when the v = v + ... is performed: one * on the value stack (in 'v') and one still stored in the @@ -2016,7 +2016,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(var != left, BINARY_OP); STAT_INC(BINARY_OP, hit); GETLOCAL(next_oparg) = NULL; - Py_DECREF(left); + assert(Py_REFCNT(left) >= 2); + Py_DECREF(left); // XXX never need to dealloc STACK_SHRINK(1); PyUnicode_Append(&TOP(), right); Py_DECREF(right); @@ -5378,7 +5379,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *lhs = SECOND(); PyObject *rhs = TOP(); next_instr--; - _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg); + _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); DISPATCH(); } else { diff --git a/Python/specialize.c b/Python/specialize.c index ce091a2c5f0784..720bc7f241586b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1742,7 +1742,7 @@ binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, - int oparg) + int oparg, PyObject **locals) { assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1); @@ -1754,7 +1754,9 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, } if (PyUnicode_CheckExact(lhs)) { _Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1]; - if (_Py_OPCODE(next) == STORE_FAST && Py_REFCNT(lhs) == 2) { + bool to_store = (_Py_OPCODE(next) == STORE_FAST || + _Py_OPCODE(next) == STORE_FAST__LOAD_FAST); + if (to_store && locals[_Py_OPARG(next)] == lhs) { _Py_SET_OPCODE(*instr, BINARY_OP_INPLACE_ADD_UNICODE); goto success; }