From eda8b067f1259a0920b17cbe0daf30d515223a4c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 Nov 2023 21:21:15 +0000 Subject: [PATCH 1/6] Use exponential backoff for tier 2 --- Python/bytecodes.c | 15 +++++++++++++-- Python/generated_cases.c.h | 15 +++++++++++++-- Python/pylifecycle.c | 1 + 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9f1dfa3b7231b5..88163380afc335 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2327,7 +2327,10 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - if (this_instr[1].cache > tstate->interp->optimizer_backedge_threshold && + uint16_t ucounter = this_instr[1].cache; + /* Convert to signed int. For most compilers, this is a no-op */ + int32_t counter = ucounter > INT16_MAX ? (uint32_t)ucounter - (1<<16) : ucounter; + if (counter > tstate->interp->optimizer_backedge_threshold && // Double-check that the opcode isn't instrumented or something: this_instr->op.code == JUMP_BACKWARD) { @@ -2338,8 +2341,16 @@ dummy_func( // Rewind and enter the executor: assert(this_instr->op.code == ENTER_EXECUTOR); next_instr = this_instr; + this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + } + else { + int backoff = counter &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < 16 - OPTIMIZER_BITS_IN_COUNTER) { + backoff++; + } + counter = 1 + backoff - (1<<(backoff+OPTIMIZER_BITS_IN_COUNTER)); + this_instr[1].cache = (uint16_t)counter; } - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } #endif /* ENABLE_SPECIALIZATION */ } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 38c368fcc10200..163887c7f41ea4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3406,7 +3406,10 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - if (this_instr[1].cache > tstate->interp->optimizer_backedge_threshold && + uint16_t ucounter = this_instr[1].cache; + /* Convert to signed int. For most compilers, this is a no-op */ + int32_t counter = ucounter > INT16_MAX ? (uint32_t)ucounter - (1<<16) : ucounter; + if (counter > tstate->interp->optimizer_backedge_threshold && // Double-check that the opcode isn't instrumented or something: this_instr->op.code == JUMP_BACKWARD) { @@ -3417,8 +3420,16 @@ // Rewind and enter the executor: assert(this_instr->op.code == ENTER_EXECUTOR); next_instr = this_instr; + this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + } + else { + int backoff = counter &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < 16 - OPTIMIZER_BITS_IN_COUNTER) { + backoff++; + } + counter = 1 + backoff - (1<<(backoff+OPTIMIZER_BITS_IN_COUNTER)); + this_instr[1].cache = (uint16_t)counter; } - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ac8d5208322882..72eff5855ff4b8 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1240,6 +1240,7 @@ init_interp_main(PyThreadState *tstate) if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } + enabled = 1; if (enabled) { PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) { From 7736e18d13542344d0bfa0eaa511e2d09ccd5d63 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 2 Nov 2023 09:00:28 +0000 Subject: [PATCH 2/6] Fix up exponential backoff calculation --- Python/bytecodes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index da602e555e8284..32c360985054e3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2343,12 +2343,12 @@ dummy_func( this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } else { - int backoff = counter &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < 16 - OPTIMIZER_BITS_IN_COUNTER) { + int backoff = counter & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { backoff++; } - counter = 1 + backoff - (1<<(backoff+OPTIMIZER_BITS_IN_COUNTER)); - this_instr[1].cache = (uint16_t)counter; + int count = -(1 << backoff); + this_instr[1].cache = (uint16_t)((count << OPTIMIZER_BITS_IN_COUNTER) | backoff); } } #endif /* ENABLE_SPECIALIZATION */ From bff925cae63908c9a4a69b17bee87d3300b9e100 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 5 Nov 2023 04:03:22 +0000 Subject: [PATCH 3/6] Use unsigned counter for consistency --- Include/cpython/optimizer.h | 6 ++++-- Python/bytecodes.c | 17 ++++++++--------- Python/generated_cases.c.h | 17 ++++++++--------- Python/optimizer.c | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 2a5251b3ecb02a..4a68e64580028d 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -45,8 +45,8 @@ typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_C typedef struct _PyOptimizerObject { PyObject_HEAD optimize_func optimize; - uint16_t resume_threshold; - uint16_t backedge_threshold; + uint32_t resume_threshold; + uint32_t backedge_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ } _PyOptimizerObject; @@ -76,6 +76,8 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); #define OPTIMIZER_BITS_IN_COUNTER 4 +/* Minimum of 16 additional executions before retry */ +#define MINIMUM_TIER2_BACKOFF 4 #ifdef __cplusplus } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f09fcc6593d38b..8978546a63d347 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2335,13 +2335,10 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - uint16_t ucounter = this_instr[1].cache; - /* Convert to signed int. For most compilers, this is a no-op */ - int32_t counter = ucounter > INT16_MAX ? (uint32_t)ucounter - (1<<16) : ucounter; - if (counter > tstate->interp->optimizer_backedge_threshold && + uint16_t counter = this_instr[1].cache; + if (counter == tstate->interp->optimizer_backedge_threshold) { // Double-check that the opcode isn't instrumented or something: - this_instr->op.code == JUMP_BACKWARD) - { + assert(this_instr->op.code == JUMP_BACKWARD); OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); ERROR_IF(optimized < 0, error); @@ -2353,11 +2350,13 @@ dummy_func( } else { int backoff = counter & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { + if (backoff < OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF) { + backoff = OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF; + } + else if (backoff < 15) { backoff++; } - int count = -(1 << backoff); - this_instr[1].cache = (uint16_t)((count << OPTIMIZER_BITS_IN_COUNTER) | backoff); + this_instr[1].cache = (0U - (1 << backoff)) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ee6aacf9debb18..7e5fa3fbfe53fa 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3409,13 +3409,10 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - uint16_t ucounter = this_instr[1].cache; - /* Convert to signed int. For most compilers, this is a no-op */ - int32_t counter = ucounter > INT16_MAX ? (uint32_t)ucounter - (1<<16) : ucounter; - if (counter > tstate->interp->optimizer_backedge_threshold && + uint16_t counter = this_instr[1].cache; + if (counter == tstate->interp->optimizer_backedge_threshold) { // Double-check that the opcode isn't instrumented or something: - this_instr->op.code == JUMP_BACKWARD) - { + assert(this_instr->op.code == JUMP_BACKWARD); OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); if (optimized < 0) goto error; @@ -3427,11 +3424,13 @@ } else { int backoff = counter & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { + if (backoff < OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF) { + backoff = OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF; + } + else if (backoff < 15) { backoff++; } - int count = -(1 << backoff); - this_instr[1].cache = (uint16_t)((count << OPTIMIZER_BITS_IN_COUNTER) | backoff); + this_instr[1].cache = (0U - (1 << backoff)) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/optimizer.c b/Python/optimizer.c index a332fd1c89582c..aad16901164f22 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -122,8 +122,8 @@ PyTypeObject _PyDefaultOptimizer_Type = { _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = error_optimize, - .resume_threshold = UINT16_MAX, - .backedge_threshold = UINT16_MAX, + .resume_threshold = UINT16_MAX+1, + .backedge_threshold = UINT16_MAX+1, }; _PyOptimizerObject * From c97252a423ae907ba9ddf84477248fb9d9f9ed2b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 5 Nov 2023 06:40:42 +0000 Subject: [PATCH 4/6] add news --- .../2023-11-05-06-40-35.gh-issue-111843.c045cB.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst new file mode 100644 index 00000000000000..280f8f9bf0b8c0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-05-06-40-35.gh-issue-111843.c045cB.rst @@ -0,0 +1,2 @@ +Use exponential backoff to reduce the number of failed tier 2 optimization +attempts by over 99%. From e84cd7b45aedce54946289976a361be45db9eda7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 5 Nov 2023 20:53:12 +0000 Subject: [PATCH 5/6] Fix up backoff arithmetic --- Include/cpython/optimizer.h | 6 ++++-- Python/bytecodes.c | 21 ++++++++++++--------- Python/generated_cases.c.h | 21 ++++++++++++--------- Python/optimizer.c | 9 +++++---- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 4a68e64580028d..adc2c1fc442280 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -45,8 +45,10 @@ typedef int (*optimize_func)(_PyOptimizerObject* self, PyCodeObject *code, _Py_C typedef struct _PyOptimizerObject { PyObject_HEAD optimize_func optimize; - uint32_t resume_threshold; - uint32_t backedge_threshold; + /* These thresholds are treated as signed so do not exceed INT16_MAX + * Use INT16_MAX to indicate that the optimizer should never be called */ + uint16_t resume_threshold; + uint16_t backedge_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ } _PyOptimizerObject; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8978546a63d347..9e5f1813b958f7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2335,10 +2335,12 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - uint16_t counter = this_instr[1].cache; - if (counter == tstate->interp->optimizer_backedge_threshold) { - // Double-check that the opcode isn't instrumented or something: - assert(this_instr->op.code == JUMP_BACKWARD); + /* We are using unsigned values, but we really want signed values, so + * do the 2s complement comparison manually */ + uint16_t ucounter = this_instr[1].cache + (1 << 15); + uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + // Double-check that the opcode isn't instrumented or something: + if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); ERROR_IF(optimized < 0, error); @@ -2349,14 +2351,15 @@ dummy_func( this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } else { - int backoff = counter & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF) { - backoff = OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF; + int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < MINIMUM_TIER2_BACKOFF) { + backoff = MINIMUM_TIER2_BACKOFF; } - else if (backoff < 15) { + else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { backoff++; } - this_instr[1].cache = (0U - (1 << backoff)) | backoff; + assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); + this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7e5fa3fbfe53fa..b9a2b2275491e7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3409,10 +3409,12 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); - uint16_t counter = this_instr[1].cache; - if (counter == tstate->interp->optimizer_backedge_threshold) { - // Double-check that the opcode isn't instrumented or something: - assert(this_instr->op.code == JUMP_BACKWARD); + /* We are using unsigned values, but we really want signed values, so + * do the 2s complement comparison manually */ + uint16_t ucounter = this_instr[1].cache + (1 << 15); + uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + // Double-check that the opcode isn't instrumented or something: + if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); if (optimized < 0) goto error; @@ -3423,14 +3425,15 @@ this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } else { - int backoff = counter & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF) { - backoff = OPTIMIZER_BITS_IN_COUNTER + MINIMUM_TIER2_BACKOFF; + int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + if (backoff < MINIMUM_TIER2_BACKOFF) { + backoff = MINIMUM_TIER2_BACKOFF; } - else if (backoff < 15) { + else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { backoff++; } - this_instr[1].cache = (0U - (1 << backoff)) | backoff; + assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); + this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/optimizer.c b/Python/optimizer.c index aad16901164f22..d3bfd19ed9446c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -107,6 +107,7 @@ error_optimize( _PyExecutorObject **exec, int Py_UNUSED(stack_entries)) { + assert(0); PyErr_Format(PyExc_SystemError, "Should never call error_optimize"); return -1; } @@ -122,8 +123,8 @@ PyTypeObject _PyDefaultOptimizer_Type = { _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = error_optimize, - .resume_threshold = UINT16_MAX+1, - .backedge_threshold = UINT16_MAX+1, + .resume_threshold = INT16_MAX, + .backedge_threshold = INT16_MAX, }; _PyOptimizerObject * @@ -309,7 +310,7 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; - opt->base.resume_threshold = UINT16_MAX; + opt->base.resume_threshold = INT16_MAX; opt->base.backedge_threshold = 0; opt->count = 0; return (PyObject *)opt; @@ -943,7 +944,7 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - opt->resume_threshold = UINT16_MAX; + opt->resume_threshold = INT16_MAX; // Need at least 3 iterations to settle specializations. // A few lower bits of the counter are reserved for other flags. opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER; From dae85481d76786a5a6c55096e2b099bac56f0fbd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 9 Nov 2023 09:12:09 +0000 Subject: [PATCH 6/6] Update Python/pylifecycle.c Co-authored-by: Donghee Na --- Python/pylifecycle.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 72eff5855ff4b8..ac8d5208322882 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1240,7 +1240,6 @@ init_interp_main(PyThreadState *tstate) if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } - enabled = 1; if (enabled) { PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) {