From c2d36683ad73d8259b5563947499f6ac39a88e26 Mon Sep 17 00:00:00 2001 From: TheRealMDoerr Date: Fri, 27 Oct 2023 21:07:57 +0200 Subject: [PATCH 1/6] 8316746: Top of lock-stack does not match the unlocked object --- src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp | 2 +- src/hotspot/cpu/ppc/templateTable_ppc_64.cpp | 114 ++++++++----------- 2 files changed, 49 insertions(+), 67 deletions(-) diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 97dc537de6a67..7473775c50d6f 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -1978,7 +1978,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register } } -// Add a InterpMonitorElem to stack (see frame_sparc.hpp). +// Add a monitor (see frame_ppc.hpp). void InterpreterMacroAssembler::add_monitor_to_stack(bool stack_is_empty, Register Rtemp1, Register Rtemp2) { // Very-local scratch registers. diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index ef2d642ede07e..7bb1db9021a21 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -4167,63 +4167,54 @@ void TemplateTable::monitorenter() { Robj_to_lock = R17_tos, Rscratch1 = R3_ARG1, Rscratch2 = R4_ARG2, - Rscratch3 = R5_ARG3, - Rcurrent_obj_addr = R6_ARG4; + Rbot = R5_ARG3, + Rfree_slot = R6_ARG4; + + Label Lfound, Lallocate_new; + + __ ld(Rscratch1, _abi0(callers_sp), R1_SP); // load FP + __ li(Rfree_slot, 0); // Points to free slot or null. + + // Set up search loop - start with topmost monitor. + __ mr(Rcurrent_monitor, R26_monitor); + __ addi(Rbot, Rscratch1, -frame::ijava_state_size); // ------------------------------------------------------------------------------ // Null pointer exception. __ null_check_throw(Robj_to_lock, -1, R11_scratch1); - // Try to acquire a lock on the object. - // Repeat until succeeded (i.e., until monitorenter returns true). + // Check if any slot is present => short cut to allocation if not. + __ cmpld(CCR0, Rcurrent_monitor, Rbot); + __ beq(CCR0, Lallocate_new); // ------------------------------------------------------------------------------ // Find a free slot in the monitor block. - Label Lfound, Lexit, Lallocate_new; - ConditionRegister found_free_slot = CCR0, - found_same_obj = CCR1, - reached_limit = CCR6; + // Note: The order of the monitors is important for C2 OSR which derives the + // unlock order from it. { - Label Lloop; - Register Rlimit = Rcurrent_monitor; + Label Lloop, LnotFree; - // Set up search loop - start with topmost monitor. - __ addi(Rcurrent_obj_addr, R26_monitor, in_bytes(BasicObjectLock::obj_offset())); - - __ ld(Rlimit, 0, R1_SP); - __ addi(Rlimit, Rlimit, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes() - in_bytes(BasicObjectLock::obj_offset()))); // Monitor base + __ bind(Lloop); + __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor); + // Found object? + __ cmpd(CCR0, Rcurrent_obj, Robj_to_lock); + __ beq(CCR0, Lfound); // recursive locking - // Check if any slot is present => short cut to allocation if not. - __ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit); - __ bgt(reached_limit, Lallocate_new); + __ cmpdi(CCR0, Rcurrent_obj, 0); + __ bne(CCR0, LnotFree); + __ mr(Rfree_slot, Rcurrent_monitor); // remember free slot closest to the bottom + __ bind(LnotFree); - // Pre-load topmost slot. - __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr); - __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size_in_bytes()); - // The search loop. - __ bind(Lloop); - // Found free slot? - __ cmpdi(found_free_slot, Rcurrent_obj, 0); - // Is this entry for same obj? If so, stop the search and take the found - // free slot or allocate a new one to enable recursive locking. - __ cmpd(found_same_obj, Rcurrent_obj, Robj_to_lock); - __ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit); - __ beq(found_free_slot, Lexit); - __ beq(found_same_obj, Lallocate_new); - __ bgt(reached_limit, Lallocate_new); - // Check if last allocated BasicLockObj reached. - __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr); - __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size_in_bytes()); - // Next iteration if unchecked BasicObjectLocks exist on the stack. - __ b(Lloop); + __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes()); + __ cmpld(CCR0, Rcurrent_monitor, Rbot); + __ bne(CCR0, Lloop); } // ------------------------------------------------------------------------------ // Check if we found a free slot. - __ bind(Lexit); - - __ addi(Rcurrent_monitor, Rcurrent_obj_addr, -(frame::interpreter_frame_monitor_size_in_bytes()) - in_bytes(BasicObjectLock::obj_offset())); - __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, - frame::interpreter_frame_monitor_size_in_bytes()); + __ cmpdi(CCR0, Rfree_slot, 0); + __ beq(CCR0, Lallocate_new); + __ mr(Rcurrent_monitor, Rfree_slot); __ b(Lfound); // We didn't find a free BasicObjLock => allocate one. @@ -4231,7 +4222,6 @@ void TemplateTable::monitorenter() { __ bind(Lallocate_new); __ add_monitor_to_stack(false, Rscratch1, Rscratch2); __ mr(Rcurrent_monitor, R26_monitor); - __ addi(Rcurrent_obj_addr, R26_monitor, in_bytes(BasicObjectLock::obj_offset())); // ------------------------------------------------------------------------------ // We now have a slot to lock. @@ -4241,7 +4231,7 @@ void TemplateTable::monitorenter() { // The object has already been popped from the stack, so the expression stack looks correct. __ addi(R14_bcp, R14_bcp, 1); - __ std(Robj_to_lock, 0, Rcurrent_obj_addr); + __ std(Robj_to_lock, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor); __ lock_object(Rcurrent_monitor, Robj_to_lock); // Check if there's enough space on the stack for the monitors after locking. @@ -4259,43 +4249,37 @@ void TemplateTable::monitorexit() { Register Rcurrent_monitor = R11_scratch1, Rcurrent_obj = R12_scratch2, Robj_to_lock = R17_tos, - Rcurrent_obj_addr = R3_ARG1, - Rlimit = R4_ARG2; + Rscratch = R3_ARG1, + Rbot = R4_ARG2; + Label Lfound, Lillegal_monitor_state; - // Check corner case: unbalanced monitorEnter / Exit. - __ ld(Rlimit, 0, R1_SP); - __ addi(Rlimit, Rlimit, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes())); // Monitor base + __ ld(Rscratch, _abi0(callers_sp), R1_SP); // load FP + + // Set up search loop - start with topmost monitor. + __ mr(Rcurrent_monitor, R26_monitor); + __ addi(Rbot, Rscratch, -frame::ijava_state_size); // Null pointer check. __ null_check_throw(Robj_to_lock, -1, R11_scratch1); - __ cmpld(CCR0, R26_monitor, Rlimit); - __ bgt(CCR0, Lillegal_monitor_state); + // Check corner case: unbalanced monitorEnter / Exit. + __ cmpld(CCR0, Rcurrent_monitor, Rbot); + __ beq(CCR0, Lillegal_monitor_state); // Find the corresponding slot in the monitors stack section. { Label Lloop; - // Start with topmost monitor. - __ addi(Rcurrent_obj_addr, R26_monitor, in_bytes(BasicObjectLock::obj_offset())); - __ addi(Rlimit, Rlimit, in_bytes(BasicObjectLock::obj_offset())); - __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr); - __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size_in_bytes()); - __ bind(Lloop); + __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor); // Is this entry for same obj? __ cmpd(CCR0, Rcurrent_obj, Robj_to_lock); __ beq(CCR0, Lfound); - // Check if last allocated BasicLockObj reached. - - __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr); - __ cmpld(CCR0, Rcurrent_obj_addr, Rlimit); - __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size_in_bytes()); - - // Next iteration if unchecked BasicObjectLocks exist on the stack. - __ ble(CCR0, Lloop); + __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes()); + __ cmpld(CCR0, Rcurrent_monitor, Rbot); + __ bne(CCR0, Lloop); } // Fell through without finding the basic obj lock => throw up! @@ -4305,8 +4289,6 @@ void TemplateTable::monitorexit() { __ align(32, 12); __ bind(Lfound); - __ addi(Rcurrent_monitor, Rcurrent_obj_addr, - -(frame::interpreter_frame_monitor_size_in_bytes()) - in_bytes(BasicObjectLock::obj_offset())); __ unlock_object(Rcurrent_monitor); } From 1e678fd2ef74d77e6b8d1dfce8e425be30f9cb85 Mon Sep 17 00:00:00 2001 From: TheRealMDoerr Date: Sun, 29 Oct 2023 17:34:26 +0100 Subject: [PATCH 2/6] Handle recursive locking like on other platforms. Improve register usage. --- src/hotspot/cpu/ppc/templateTable_ppc_64.cpp | 29 ++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index 7bb1db9021a21..977ffbe3772fe 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -4159,14 +4159,13 @@ void TemplateTable::athrow() { // at next monitor exit. void TemplateTable::monitorenter() { transition(atos, vtos); - __ verify_oop(R17_tos); - Register Rcurrent_monitor = R11_scratch1, - Rcurrent_obj = R12_scratch2, + Register Rcurrent_monitor = R3_ARG1, + Rcurrent_obj = R4_ARG2, Robj_to_lock = R17_tos, - Rscratch1 = R3_ARG1, - Rscratch2 = R4_ARG2, + Rscratch1 = R11_scratch1, + Rscratch2 = R12_scratch2, Rbot = R5_ARG3, Rfree_slot = R6_ARG4; @@ -4181,7 +4180,7 @@ void TemplateTable::monitorenter() { // ------------------------------------------------------------------------------ // Null pointer exception. - __ null_check_throw(Robj_to_lock, -1, R11_scratch1); + __ null_check_throw(Robj_to_lock, -1, Rscratch1); // Check if any slot is present => short cut to allocation if not. __ cmpld(CCR0, Rcurrent_monitor, Rbot); @@ -4192,13 +4191,14 @@ void TemplateTable::monitorenter() { // Note: The order of the monitors is important for C2 OSR which derives the // unlock order from it. { - Label Lloop, LnotFree; + Label Lloop, LnotFree, Lexit; __ bind(Lloop); __ ld(Rcurrent_obj, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor); - // Found object? + // Exit if current entry is for same object; this guarantees, that new monitor + // used for recursive lock is above the older one. __ cmpd(CCR0, Rcurrent_obj, Robj_to_lock); - __ beq(CCR0, Lfound); // recursive locking + __ beq(CCR0, Lexit); // recursive locking __ cmpdi(CCR0, Rcurrent_obj, 0); __ bne(CCR0, LnotFree); @@ -4208,6 +4208,7 @@ void TemplateTable::monitorenter() { __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes()); __ cmpld(CCR0, Rcurrent_monitor, Rbot); __ bne(CCR0, Lloop); + __ bind(Lexit); } // ------------------------------------------------------------------------------ @@ -4246,11 +4247,11 @@ void TemplateTable::monitorexit() { transition(atos, vtos); __ verify_oop(R17_tos); - Register Rcurrent_monitor = R11_scratch1, - Rcurrent_obj = R12_scratch2, + Register Rcurrent_monitor = R3_ARG1, + Rcurrent_obj = R4_ARG2, Robj_to_lock = R17_tos, - Rscratch = R3_ARG1, - Rbot = R4_ARG2; + Rscratch = R11_scratch1, + Rbot = R12_scratch2; Label Lfound, Lillegal_monitor_state; @@ -4261,7 +4262,7 @@ void TemplateTable::monitorexit() { __ addi(Rbot, Rscratch, -frame::ijava_state_size); // Null pointer check. - __ null_check_throw(Robj_to_lock, -1, R11_scratch1); + __ null_check_throw(Robj_to_lock, -1, Rscratch); // Check corner case: unbalanced monitorEnter / Exit. __ cmpld(CCR0, Rcurrent_monitor, Rbot); From 5011761fd5667a4d367037ef43028da82143d4f0 Mon Sep 17 00:00:00 2001 From: TheRealMDoerr Date: Sun, 29 Oct 2023 18:41:48 +0100 Subject: [PATCH 3/6] Add regression test. --- .../jtreg/compiler/locks/TestUnlockOSR.java | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java diff --git a/test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java b/test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java new file mode 100644 index 0000000000000..f2133b49658c6 --- /dev/null +++ b/test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test + * @bug 8316746 + * @summary During OSR, locks get transferred from interpreter frame. + * Check that unlocking 2 such locks works in the OSR compiled nmethod. + * Some platforms verify that the unlocking happens in the corrent order. + * + * @run main/othervm -Xbatch TestUnlockOSR + */ + +public class TestUnlockOSR { + static void test_method(Object a, Object b, int limit) { + synchronized(a) { // allocate space for monitors + synchronized(b) { + } + } // free space to test allocation in reused space + synchronized(a) { // reuse the space + synchronized(b) { + for (int i = 0; i < limit; i++) {} + } + } + } + + public static void main(String[] args) { + Object a = new TestUnlockOSR(), + b = new TestUnlockOSR(); + // avoid uncommon trap before last unlocks + for (int i = 0; i < 100; i++) { test_method(a, b, 0); } + // trigger OSR + test_method(a, b, 100000); + } +} From d5977edbc80c564a5a17aea915659ab86f36dd51 Mon Sep 17 00:00:00 2001 From: TheRealMDoerr Date: Thu, 2 Nov 2023 17:02:26 +0100 Subject: [PATCH 4/6] Remove wrong comment. --- src/hotspot/cpu/arm/frame_arm.cpp | 1 - src/hotspot/cpu/ppc/frame_ppc.cpp | 1 - src/hotspot/cpu/s390/frame_s390.cpp | 1 - src/hotspot/cpu/zero/frame_zero.cpp | 1 - 4 files changed, 4 deletions(-) diff --git a/src/hotspot/cpu/arm/frame_arm.cpp b/src/hotspot/cpu/arm/frame_arm.cpp index 1ee8df049fccb..d923e1f43ad43 100644 --- a/src/hotspot/cpu/arm/frame_arm.cpp +++ b/src/hotspot/cpu/arm/frame_arm.cpp @@ -279,7 +279,6 @@ BasicObjectLock* frame::interpreter_frame_monitor_begin() const { return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); } -// Pointer beyond the "oldest/deepest" BasicObjectLock on stack. BasicObjectLock* frame::interpreter_frame_monitor_end() const { BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); // make sure the pointer points inside the frame diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp index 96a87ea9d1179..637883986195b 100644 --- a/src/hotspot/cpu/ppc/frame_ppc.cpp +++ b/src/hotspot/cpu/ppc/frame_ppc.cpp @@ -454,7 +454,6 @@ intptr_t *frame::initial_deoptimization_info() { frame::frame(void* sp, void* fp, void* pc) : frame((intptr_t*)sp, (address)pc) {} #endif -// Pointer beyond the "oldest/deepest" BasicObjectLock on stack. BasicObjectLock* frame::interpreter_frame_monitor_end() const { BasicObjectLock* result = (BasicObjectLock*) at_relative(ijava_idx(monitors)); // make sure the pointer points inside the frame diff --git a/src/hotspot/cpu/s390/frame_s390.cpp b/src/hotspot/cpu/s390/frame_s390.cpp index ac24e43f00ce4..dbaa243eb1cac 100644 --- a/src/hotspot/cpu/s390/frame_s390.cpp +++ b/src/hotspot/cpu/s390/frame_s390.cpp @@ -672,7 +672,6 @@ intptr_t *frame::initial_deoptimization_info() { return fp(); } -// Pointer beyond the "oldest/deepest" BasicObjectLock on stack. BasicObjectLock* frame::interpreter_frame_monitor_end() const { return interpreter_frame_monitors(); } diff --git a/src/hotspot/cpu/zero/frame_zero.cpp b/src/hotspot/cpu/zero/frame_zero.cpp index 923d3082b25e7..5ddd23a9d59ef 100644 --- a/src/hotspot/cpu/zero/frame_zero.cpp +++ b/src/hotspot/cpu/zero/frame_zero.cpp @@ -82,7 +82,6 @@ BasicObjectLock* frame::interpreter_frame_monitor_begin() const { return get_interpreterState()->monitor_base(); } -// Pointer beyond the "oldest/deepest" BasicObjectLock on stack. BasicObjectLock* frame::interpreter_frame_monitor_end() const { return (BasicObjectLock*) get_interpreterState()->stack_base(); } From 56209f27a9986f0ad0db25bc12d4d11434a71881 Mon Sep 17 00:00:00 2001 From: TheRealMDoerr Date: Thu, 2 Nov 2023 17:56:27 +0100 Subject: [PATCH 5/6] Improve comment. --- src/hotspot/cpu/ppc/templateTable_ppc_64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index 977ffbe3772fe..0cb14fe3a7be4 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -4189,7 +4189,7 @@ void TemplateTable::monitorenter() { // ------------------------------------------------------------------------------ // Find a free slot in the monitor block. // Note: The order of the monitors is important for C2 OSR which derives the - // unlock order from it. + // unlock order from it (see comments for interpreter_frame_monitor_*). { Label Lloop, LnotFree, Lexit; From 0c1a93439046ff40c8070e7bc34165bf87d4a842 Mon Sep 17 00:00:00 2001 From: TheRealMDoerr Date: Wed, 8 Nov 2023 17:44:32 +0100 Subject: [PATCH 6/6] Shorter code: Use Rfree_slot as monitor. --- src/hotspot/cpu/ppc/templateTable_ppc_64.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index 0cb14fe3a7be4..9ffb75483b8a6 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -4214,15 +4214,12 @@ void TemplateTable::monitorenter() { // ------------------------------------------------------------------------------ // Check if we found a free slot. __ cmpdi(CCR0, Rfree_slot, 0); - __ beq(CCR0, Lallocate_new); - __ mr(Rcurrent_monitor, Rfree_slot); - __ b(Lfound); + __ bne(CCR0, Lfound); // We didn't find a free BasicObjLock => allocate one. - __ align(32, 12); __ bind(Lallocate_new); __ add_monitor_to_stack(false, Rscratch1, Rscratch2); - __ mr(Rcurrent_monitor, R26_monitor); + __ mr(Rfree_slot, R26_monitor); // ------------------------------------------------------------------------------ // We now have a slot to lock. @@ -4232,8 +4229,8 @@ void TemplateTable::monitorenter() { // The object has already been popped from the stack, so the expression stack looks correct. __ addi(R14_bcp, R14_bcp, 1); - __ std(Robj_to_lock, in_bytes(BasicObjectLock::obj_offset()), Rcurrent_monitor); - __ lock_object(Rcurrent_monitor, Robj_to_lock); + __ std(Robj_to_lock, in_bytes(BasicObjectLock::obj_offset()), Rfree_slot); + __ lock_object(Rfree_slot, Robj_to_lock); // Check if there's enough space on the stack for the monitors after locking. // This emits a single store.