From 2b9a155a6be1f29438b2090e300cd904d5b9292f Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 29 May 2024 14:49:14 +0200 Subject: [PATCH 01/11] 8320318: ObjectMonitor Responsible thread --- .../cpu/aarch64/c2_CodeStubs_aarch64.cpp | 57 ++++++++++++- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 41 +++++++--- src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp | 81 ++++++++++++++++++- src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 41 +++++++--- src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp | 56 +++++++++++++ .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 38 ++++++--- src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp | 77 +++++++++--------- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 33 +++++--- src/hotspot/share/opto/c2_CodeStubs.hpp | 10 ++- src/hotspot/share/runtime/objectMonitor.cpp | 56 ++++++++++++- src/hotspot/share/runtime/objectMonitor.hpp | 1 + 11 files changed, 397 insertions(+), 94 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp index dabafb9288b83..b0ed46675c1e6 100644 --- a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -91,4 +91,59 @@ void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { __ b(continuation()); } +int C2FastUnlockLightweightStub::max_size() const { + return 256; +} + +void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { + const Register monitor = _mark; + const Register contentions_addr = _t; + const Register prev_contentions_value = _mark; + const Register owner_addr = _thread; + + Label slow_path, decrement_contentions_slow_path, decrement_contentions_fast_path; + + { // Check for, and try to cancel any async deflation. + __ bind(_check_deflater); + + // CAS owner (null => current thread). + __ cmpxchg(owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true, + /*release*/ false, /*weak*/ false, _t); + __ br(Assembler::EQ, slow_path); + + __ cmp(_t, checked_cast(reinterpret_cast(DEFLATER_MARKER))); + __ br(Assembler::NE, unlocked_continuation()); + + // The deflator owns the lock. Try to cancel the deflation by + // first incrementing contentions... + __ lea(contentions_addr, Address(monitor, ObjectMonitor::contentions_offset())); + __ atomic_addw(prev_contentions_value, 1, contentions_addr); + + __ cmp(prev_contentions_value, zr); + __ br(Assembler::LS, decrement_contentions_fast_path); // Mr. Deflator won the race. + + // ... then try to take the ownership. If we manage to cancel deflation, + // ObjectMonitor::deflate_monitor() will decrement contentions, which is why + // we don't do it here. + __ mov(rscratch2, checked_cast(reinterpret_cast(DEFLATER_MARKER))); + __ cmpxchg(owner_addr, rscratch2, rthread, Assembler::xword, /*acquire*/ true, + /*release*/ false, /*weak*/ false, zr); + __ br(Assembler::EQ, slow_path); // We successfully canceled deflation. + + __ cmpxchg(owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true, + /*release*/ false, /*weak*/ false, zr); + __ br(Assembler::EQ, decrement_contentions_slow_path); + + __ bind(decrement_contentions_fast_path); + __ atomic_addw(noreg, -1, contentions_addr); + __ b(unlocked_continuation()); + + __ bind(decrement_contentions_slow_path); + __ atomic_addw(noreg, -1, contentions_addr); + __ bind(slow_path); + __ cmp(zr, rthread); // Set Flag to NE + __ b(slow_path_continuation()); + } +} + #undef __ diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 7e3ceb1f02029..2d713a39d47a6 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -339,7 +339,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis // Handle inflated monitor. Label inflated, inflated_load_monitor; // Finish fast unlock successfully. MUST branch to with flag == EQ - Label unlocked; + Label unlocked, set_eq_unlocked; // Finish fast unlock unsuccessfully. MUST branch to with flag == NE Label slow_path; @@ -347,6 +347,16 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis const Register t2_top = t2; const Register t3_t = t3; + Label dummy; + C2FastUnlockLightweightStub* stub = nullptr; + + if (!Compile::current()->output()->in_scratch_emit_size()) { + stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, t1, t3, t2); + Compile::current()->output()->add_stub(stub); + } + + Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); + { // Lightweight unlock // Check if obj is top of lock-stack. @@ -435,33 +445,37 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis bind(not_recursive); - Label release; const Register t2_owner_addr = t2; // Compute owner address. lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, t2_owner_addr); + membar(StoreLoad); + // Check if the entry lists are empty. ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset())); ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset())); orr(rscratch1, rscratch1, t3_t); cmp(rscratch1, zr); - br(Assembler::EQ, release); + br(Assembler::EQ, unlocked); - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - str(rthread, Address(t2_owner_addr)); - b(slow_path); + ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset())); + cmp(rscratch1, zr); + br(Assembler::NE, unlocked); - bind(release); - // Set owner to null. - // Release to satisfy the JMM - stlr(zr, t2_owner_addr); + // Check for, and try to cancel any async deflation. + b(check_deflater); } bind(unlocked); + if (stub != nullptr) { + bind(stub->unlocked_continuation()); + } decrement(Address(rthread, JavaThread::held_monitor_count_offset())); + cmp(zr, zr); // Set Flags to EQ #ifdef ASSERT // Check that unlocked label is reached with Flags == EQ. @@ -471,6 +485,9 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis #endif bind(slow_path); + if (stub != nullptr) { + bind(stub->slow_path_continuation()); + } #ifdef ASSERT // Check that slow_path label is reached with Flags == NE. br(Assembler::NE, flag_correct); diff --git a/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp index 93ee0659a57f8..5048af33358a4 100644 --- a/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, 2022, SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "opto/c2_MacroAssembler.hpp" #include "opto/c2_CodeStubs.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #define __ masm. @@ -57,4 +58,82 @@ void C2SafepointPollStub::emit(C2_MacroAssembler& masm) { __ mtctr(R0); __ bctr(); } + +int C2FastUnlockLightweightStub::max_size() const { + return 256; +} + +void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { + const Register monitor = _mark; + const Register contentions_addr = _t; + const Register current_value = _t; + const Register prev_contentions_value = _mark; + const Register owner_addr = _thread; + + Label slow_path, decrement_contentions_slow_path, decrement_contentions_fast_path; + + { // Check for, and try to cancel any async deflation. + __ bind(_check_deflater); + + // Compute owner address. + __ addi(owner_addr, monitor, in_bytes(ObjectMonitor::owner_offset())); + + // CAS owner (null => current thread). + __ cmpxchgd(/*flag=*/_flag, + current_value, + /*compare_value=*/(intptr_t)0, + /*exchange_value=*/R16_thread, + /*where=*/owner_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + __ beq(_flag, slow_path); + + __ cmpdi(_flag, current_value, reinterpret_cast(DEFLATER_MARKER)); + __ bne(_flag, unlocked_continuation()); + + // The deflator owns the lock. Try to cancel the deflation by + // first incrementing contentions... + __ addi(contentions_addr, monitor, in_bytes(ObjectMonitor::contentions_offset())); + __ li(R0, 1); + __ getandaddw(prev_contentions_value, /*inc_value*/ R0, contentions_addr, /*tmp1*/ _t, MacroAssembler::cmpxchgx_hint_atomic_update()); + + __ cmpwi(_flag, prev_contentions_value, 0); + __ ble(_flag, decrement_contentions_fast_path); // Mr. Deflator won the race. + + // ... then try to take the ownership. If we manage to cancel deflation, + // ObjectMonitor::deflate_monitor() will decrement contentions, which is why + // we don't do it here. + __ cmpxchgd(/*flag=*/_flag, + current_value, + /*compare_value=*/reinterpret_cast(DEFLATER_MARKER), + /*exchange_value=*/R16_thread, + /*where=*/owner_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + __ beq(_flag, slow_path); // We successfully canceled deflation. + + // CAS owner (null => current thread). + __ cmpxchgd(/*flag=*/_flag, + current_value, + /*compare_value=*/(intptr_t)0, + /*exchange_value=*/R16_thread, + /*where=*/owner_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + __ beq(_flag, decrement_contentions_slow_path); + + __ bind(decrement_contentions_fast_path); + __ li(R0, -1); + __ getandaddw(prev_contentions_value, /*inc_value*/ R0, contentions_addr, /*tmp1*/ _t, MacroAssembler::cmpxchgx_hint_atomic_update()); + __ b(unlocked_continuation()); + + __ bind(decrement_contentions_slow_path); + __ li(R0, -1); + __ getandaddw(prev_contentions_value, /*inc_value*/ R0, contentions_addr, /*tmp1*/ _t, MacroAssembler::cmpxchgx_hint_atomic_update()); + __ bind(slow_path); + __ cmpdi(_flag, R16_thread, 0); // Set Flag to NE + __ b(slow_path_continuation()); + } +} + #undef __ diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 4b74b1ae941d8..9d52c81b5334c 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -37,6 +37,7 @@ #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/methodData.hpp" +#include "opto/c2_CodeStubs.hpp" #include "prims/methodHandles.hpp" #include "register_ppc.hpp" #include "runtime/icache.hpp" @@ -2575,6 +2576,16 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f const Register top = tmp2; const Register t = tmp3; + Label dummy; + C2FastUnlockLightweightStub* stub = nullptr; + + if (!Compile::current()->output()->in_scratch_emit_size()) { + stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(flag, obj, mark, tmp3, tmp2); + Compile::current()->output()->add_stub(stub); + } + + Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); + { // Lightweight unlock Label push_and_slow; @@ -2673,31 +2684,34 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f bind(not_recursive); - Label release_; const Register t2 = tmp2; + // Set owner to null. + release(); + li(t, 0); + std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + membar(StoreLoad); + // Check if the entry lists are empty. ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor); orr(t, t, t2); cmpdi(flag, t, 0); - beq(flag, release_); + beq(flag, unlocked); - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor); - b(slow_path); + ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor); + cmpdi(flag, t, 0); + bne(flag, unlocked); - bind(release_); - // Set owner to null. - release(); - // t contains 0 - std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + b(check_deflater); } bind(unlocked); + if (stub != nullptr) { + bind(stub->unlocked_continuation()); + } dec_held_monitor_count(t); + cmpd(flag, t, t); // Set Flag to EQ #ifdef ASSERT // Check that unlocked label is reached with flag == EQ. @@ -2706,6 +2720,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f stop("Fast Lock Flag != EQ"); #endif bind(slow_path); + if (stub != nullptr) { + bind(stub->slow_path_continuation()); + } #ifdef ASSERT // Check that slow_path label is reached with flag == NE. bne(flag, flag_correct); diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp index 7995750aba96b..a9c4e7519be3f 100644 --- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp @@ -99,4 +99,60 @@ void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { __ j(continuation()); } +int C2FastUnlockLightweightStub::max_size() const { + return 256; +} + +void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { + const Register flag = t1; + const Register monitor = _mark; + const Register contentions_addr = _t; + const Register prev_contentions_value = _mark; + const Register owner_addr = _thread; + + Label slow_path, fast_path, decrement_contentions_slow_path, decrement_contentions_fast_path; + + { // Check for, and try to cancel any async deflation. + __ bind(_check_deflater); + + // CAS owner (null => current thread). + __ cmpxchg(owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t1); + __ beqz(t1, slow_path); + + __ li(t0, reinterpret_cast(DEFLATER_MARKER)); + __ bne(t0, t1, fast_path); + + // The deflator owns the lock. Try to cancel the deflation by + // first incrementing contentions... + __ la(contentions_addr, Address(monitor, ObjectMonitor::contentions_offset())); + __ atomic_addw(prev_contentions_value, 1, contentions_addr); + + __ blez(prev_contentions_value, decrement_contentions_fast_path); // Mr. Deflator won the race. + + // ... then try to take the ownership. If we manage to cancel deflation, + // ObjectMonitor::deflate_monitor() will decrement contentions, which is why + // we don't do it here. + // t1 contains DEFLATER_MARKER (the current owner) + __ cmpxchg(owner_addr, /*expected*/ t1, /*new*/ xthread, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t2); + __ beq(t1, t2, slow_path); // We successfully canceled deflation. + + __ cmpxchg(owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t1); + __ beqz(t1, decrement_contentions_slow_path); + + __ bind(decrement_contentions_fast_path); + __ atomic_addw(noreg, -1, contentions_addr); + __ bind(fast_path); + __ j(unlocked_continuation()); + + __ bind(decrement_contentions_slow_path); + __ atomic_addw(noreg, -1, contentions_addr); + __ bind(slow_path); + __ mv(flag, 1); // Set Flag to NE + __ j(slow_path_continuation()); + } +} + #undef __ diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index dd9a9974f1b94..cfbd09e791248 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -386,6 +386,16 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg const Register tmp2_top = tmp2; const Register tmp3_t = tmp3; + Label dummy; + C2FastUnlockLightweightStub* stub = nullptr; + + if (!Compile::current()->output()->in_scratch_emit_size()) { + stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, tmp1_mark, tmp3, tmp2); + Compile::current()->output()->add_stub(stub); + } + + Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); + { // Lightweight unlock // Check if obj is top of lock-stack. @@ -480,25 +490,30 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg // Compute owner address. la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); + // Set owner to null. + // Release to satisfy the JMM + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(tmp2_owner_addr)); + membar(StoreLoad); + // Check if the entry lists are empty. ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); orr(t0, t0, tmp3_t); - beqz(t0, release); + beqz(t0, unlocked); - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - sd(xthread, Address(tmp2_owner_addr)); - j(slow_path); + // Check successor. + ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset())); + bnez(tmp3_t, unlocked); - bind(release); - // Set owner to null. - membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp2_owner_addr)); + // Check for, and try to cancel any async deflation. + j(check_deflater); } bind(unlocked); + if (stub != nullptr) { + bind(stub->unlocked_continuation()); + } mv(flag, zr); decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3); @@ -510,6 +525,9 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg #endif bind(slow_path); + if (stub != nullptr) { + bind(stub->slow_path_continuation()); + } #ifdef ASSERT // Check that slow_path label is reached with flag != 0. bnez(flag, flag_correct); diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp index 6dc8d14064ad2..42a6fd77639c2 100644 --- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -74,14 +74,12 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { } int C2FastUnlockLightweightStub::max_size() const { - return 128; + return 256; } void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { assert(_t == rax, "must be"); - Label restore_held_monitor_count_and_slow_path; - { // Restore lock-stack and handle the unlock in runtime. __ bind(_push_and_slow_path); @@ -91,55 +89,52 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { __ movptr(Address(_thread, _t), _obj); #endif __ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize); + __ xorl(rax, 1); // Make ZF = 0 + __ jmp(slow_path_continuation()); } - { // Restore held monitor count and slow path. + { // Check for, and try to cancel any async deflation. + __ bind(_check_deflater); - __ bind(restore_held_monitor_count_and_slow_path); - // Restore held monitor count. - __ increment(Address(_thread, JavaThread::held_monitor_count_offset())); - // increment will always result in ZF = 0 (no overflows). - __ jmp(slow_path_continuation()); - } + const Register monitor = _mark; + Label slow_path, decrement_contentions_slow_path, decrement_contentions_fast_path; + + // CAS owner (null => current thread). + __ xorptr(rax, rax); + __ lock(); __ cmpxchgptr(_thread, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + __ jccb(Assembler::equal, slow_path); - { // Handle monitor medium path. + // rax contains the current owner here + __ cmpptr(rax, reinterpret_cast(DEFLATER_MARKER)); + __ jcc(Assembler::notEqual, unlocked_continuation()); - __ bind(_check_successor); + // The deflator owns the lock. Try to cancel the deflation by + // first incrementing contentions... + __ lock(); __ incrementl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions))); - Label fix_zf_and_unlocked; - const Register monitor = _mark; + __ cmpl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions)), 0); + __ jccb(Assembler::less, decrement_contentions_fast_path); // Mr. Deflator won the race. + + // ... then try to take the ownership. If we manage to cancel deflation, + // ObjectMonitor::deflate_monitor() will decrementcontentions, which is why + // we don'tdo it here. + // rax contains DEFLATER_MARKER (the current owner) + __ lock(); __ cmpxchgptr(_thread, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + __ jcc(Assembler::equal, slow_path); // We successfully canceled deflation. -#ifndef _LP64 - __ jmpb(restore_held_monitor_count_and_slow_path); -#else // _LP64 - // successor null check. - __ cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - __ jccb(Assembler::equal, restore_held_monitor_count_and_slow_path); - - // Release lock. - __ movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - - // Fence. - // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. - __ lock(); __ addl(Address(rsp, 0), 0); - - // Recheck successor. - __ cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - // Observed a successor after the release -> fence we have handed off the monitor - __ jccb(Assembler::notEqual, fix_zf_and_unlocked); - - // Try to relock, if it fails the monitor has been handed over - // TODO: Caveat, this may fail due to deflation, which does - // not handle the monitor handoff. Currently only works - // due to the responsible thread. __ xorptr(rax, rax); __ lock(); __ cmpxchgptr(_thread, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - __ jccb (Assembler::equal, restore_held_monitor_count_and_slow_path); -#endif + __ jccb(Assembler::equal, decrement_contentions_slow_path); - __ bind(fix_zf_and_unlocked); - __ xorl(rax, rax); + __ bind(decrement_contentions_fast_path); + __ lock(); __ decrementl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions))); __ jmp(unlocked_continuation()); + + __ bind(decrement_contentions_slow_path); + __ lock(); __ decrementl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions))); + __ bind(slow_path); + __ xorl(rax, 1); // Make ZF = 0 + __ jmp(slow_path_continuation()); } } diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index d0eb103d81b87..87a484ef69bc1 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1047,10 +1047,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Handle inflated monitor. Label inflated, inflated_check_lock_stack; // Finish fast unlock successfully. MUST jump with ZF == 1 - Label unlocked; - - // Assume success. - decrement(Address(thread, JavaThread::held_monitor_count_offset())); + Label unlocked, slow_path; const Register mark = t; const Register top = reg_rax; @@ -1064,7 +1061,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, } Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path(); - Label& check_successor = stub == nullptr ? dummy : stub->check_successor(); + Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); { // Lightweight Unlock @@ -1140,19 +1137,30 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); jccb(Assembler::notEqual, recursive); - // Check if the entry lists are empty. + // Release lock. + movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); + + // Memory barrier/fence + // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. + // This is faster on Nehalem and AMD Shanghai/Barcelona. + // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences + lock(); addl(Address(rsp, 0), 0); + + // Check if the entry lists are empty, and if so we are done. movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - jcc(Assembler::notZero, check_successor); + jccb(Assembler::zero, unlocked); - // Release lock. - movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb(unlocked); + // Check if there is a successor, and of there is we are done. + cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); + jccb(Assembler::notZero, unlocked); + + // Check for, and try to cancel any async deflation. + jmp(check_deflater); // Recursive unlock. bind(recursive); decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - xorl(t, t); #endif } @@ -1160,6 +1168,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, if (stub != nullptr) { bind(stub->unlocked_continuation()); } + decrement(Address(thread, JavaThread::held_monitor_count_offset())); + xorl(t, t); // Set ZF = 1 #ifdef ASSERT // Check that unlocked label is reached with ZF set. @@ -1168,6 +1178,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, stop("Fast Unlock ZF != 1"); #endif + bind(slow_path); if (stub != nullptr) { bind(stub->slow_path_continuation()); } diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp index 1316fa68ed430..f65138027e485 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.hpp +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -99,20 +99,26 @@ class C2EntryBarrierStub: public C2CodeStub { class C2FastUnlockLightweightStub : public C2CodeStub { private: + PPC_ONLY(ConditionRegister _flag;) Register _obj; Register _mark; Register _t; Register _thread; Label _push_and_slow_path; - Label _check_successor; + Label _check_deflater; Label _unlocked_continuation; public: +#ifdef PPC64 + C2FastUnlockLightweightStub(ConditionRegister flag, Register obj, Register mark, Register t, Register thread) : C2CodeStub(), + _flag(flag), _obj(obj), _mark(mark), _t(t), _thread(thread) {} +#else C2FastUnlockLightweightStub(Register obj, Register mark, Register t, Register thread) : C2CodeStub(), _obj(obj), _mark(mark), _t(t), _thread(thread) {} +#endif int max_size() const; void emit(C2_MacroAssembler& masm); Label& push_and_slow_path() { return _push_and_slow_path; } - Label& check_successor() { return _check_successor; } + Label& check_deflater() { return _check_deflater; } Label& unlocked_continuation() { return _unlocked_continuation; } Label& slow_path_continuation() { return continuation(); } }; diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 178f3e97d7108..92eedf9f19e1f 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -812,7 +812,11 @@ void ObjectMonitor::EnterI(JavaThread* current) { // timer scalability issues we see on some platforms as we'd only have one thread // -- the checker -- parked on a timer. - if (nxt == nullptr && _EntryList == nullptr) { + if (nxt == nullptr && _EntryList == nullptr + X86_ONLY (&& LockingMode != LM_LIGHTWEIGHT) + RISCV_ONLY (&& LockingMode != LM_LIGHTWEIGHT) + AARCH64_ONLY(&& LockingMode != LM_LIGHTWEIGHT) + ) { // Try to assume the role of responsible thread for the monitor. // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } Atomic::replace_if_null(&_Responsible, current); @@ -836,6 +840,22 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { break; } + + if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { + // Cancelled the in-progress async deflation by changing owner from + // DEFLATER_MARKER to current. As part of the contended enter protocol, + // contentions was incremented to a positive value before EnterI() + // was called and that prevents the deflater thread from winning the + // last part of the 2-part async deflation protocol. After EnterI() + // returns to enter(), contentions is decremented because the caller + // now owns the monitor. We bump contentions an extra time here to + // prevent the deflater thread from winning the last part of the + // 2-part async deflation protocol after the regular decrement + // occurs in enter(). The deflater thread will decrement contentions + // after it recognizes that the async deflation was cancelled. + add_to_contentions(1); + break; + } assert(owner_raw() != current, "invariant"); // park self @@ -1207,7 +1227,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { return; } - // Invariant: after setting Responsible=null an thread must execute + // Invariant: after setting Responsible=null a thread must execute // a MEMBAR or other serializing instruction before fetching EntryList|cxq. _Responsible = nullptr; @@ -1272,8 +1292,36 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // to reacquire the lock the responsibility for ensuring succession // falls to the new owner. // - if (try_set_owner_from(nullptr, current) != nullptr) { - return; + void* owner = try_set_owner_from(nullptr, current); + if (owner != nullptr) { + if (owner != DEFLATER_MARKER) { + // Owned by another thread, so we are done. + return; + } + + // The deflator owns the lock. Try to cancel the deflation by + // first incrementing contentions... + add_to_contentions(1); + + if (contentions() < 0) { + assert((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != nullptr, ""); + add_to_contentions(-1); + // Mr. Deflator won the race, so we are done. + return; + } + + // ... then try to take the ownership. + if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { + // We successfully canceled deflation. ObjectMonitor::deflate_monitor() + // will decrement contentions, which is why we don't do it here. + } else { + owner = try_set_owner_from(nullptr, current); + add_to_contentions(-1); + if (owner != nullptr) { + // Owned by another thread, so we are done. + return; + } + } } guarantee(owner_raw() == current, "invariant"); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index fdc482a3555d6..f201af95f6c20 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -218,6 +218,7 @@ class ObjectMonitor : public CHeapObj { static ByteSize cxq_offset() { return byte_offset_of(ObjectMonitor, _cxq); } static ByteSize succ_offset() { return byte_offset_of(ObjectMonitor, _succ); } static ByteSize EntryList_offset() { return byte_offset_of(ObjectMonitor, _EntryList); } + static ByteSize contentions_offset() { return byte_offset_of(ObjectMonitor, _contentions); } // ObjectMonitor references can be ORed with markWord::monitor_value // as part of the ObjectMonitor tagging mechanism. When we combine an From 12bf946fa11d2872280dbd89a63c3953bc721343 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 5 Jun 2024 14:01:44 +0200 Subject: [PATCH 02/11] Small fixes for x86 and PowerPC. --- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 20 ++----------------- src/hotspot/share/runtime/objectMonitor.cpp | 1 + 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 87a484ef69bc1..872abe4b0425b 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1116,26 +1116,11 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // mark contains the tagged ObjectMonitor*. const Register monitor = mark; - -#ifndef _LP64 - // Check if recursive. - xorptr(reg_rax, reg_rax); - orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jcc(Assembler::notZero, check_successor); - - // Check if the entry lists are empty. - movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - jcc(Assembler::notZero, check_successor); - - // Release lock. - movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); -#else // _LP64 Label recursive; // Check if recursive. cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); - jccb(Assembler::notEqual, recursive); + jccb(Assembler::notZero, recursive); // Release lock. movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); @@ -1151,7 +1136,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb(Assembler::zero, unlocked); - // Check if there is a successor, and of there is we are done. + // Check if there is a successor, and if there is we are done. cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); jccb(Assembler::notZero, unlocked); @@ -1161,7 +1146,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Recursive unlock. bind(recursive); decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); -#endif } bind(unlocked); diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 92eedf9f19e1f..4e3f5551f4600 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -816,6 +816,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { X86_ONLY (&& LockingMode != LM_LIGHTWEIGHT) RISCV_ONLY (&& LockingMode != LM_LIGHTWEIGHT) AARCH64_ONLY(&& LockingMode != LM_LIGHTWEIGHT) + PPC_ONLY (&& LockingMode != LM_LIGHTWEIGHT) ) { // Try to assume the role of responsible thread for the monitor. // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } From 25a19dbc3e21a9793aaf61cf3c76e05049d2c99e Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Mon, 15 Jul 2024 20:58:58 +0200 Subject: [PATCH 03/11] Moved complexity from assembler to c++ --- .../cpu/aarch64/c2_CodeStubs_aarch64.cpp | 57 +-------- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 23 +--- src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp | 81 +------------ src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 22 +--- src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp | 56 --------- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 21 +--- src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp | 48 +------- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 19 ++- src/hotspot/share/opto/c2_CodeStubs.hpp | 8 -- src/hotspot/share/runtime/objectMonitor.cpp | 114 +++++++----------- src/hotspot/share/runtime/objectMonitor.hpp | 9 +- src/hotspot/share/runtime/sharedRuntime.cpp | 13 ++ 12 files changed, 81 insertions(+), 390 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp index b0ed46675c1e6..dabafb9288b83 100644 --- a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -91,59 +91,4 @@ void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { __ b(continuation()); } -int C2FastUnlockLightweightStub::max_size() const { - return 256; -} - -void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { - const Register monitor = _mark; - const Register contentions_addr = _t; - const Register prev_contentions_value = _mark; - const Register owner_addr = _thread; - - Label slow_path, decrement_contentions_slow_path, decrement_contentions_fast_path; - - { // Check for, and try to cancel any async deflation. - __ bind(_check_deflater); - - // CAS owner (null => current thread). - __ cmpxchg(owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true, - /*release*/ false, /*weak*/ false, _t); - __ br(Assembler::EQ, slow_path); - - __ cmp(_t, checked_cast(reinterpret_cast(DEFLATER_MARKER))); - __ br(Assembler::NE, unlocked_continuation()); - - // The deflator owns the lock. Try to cancel the deflation by - // first incrementing contentions... - __ lea(contentions_addr, Address(monitor, ObjectMonitor::contentions_offset())); - __ atomic_addw(prev_contentions_value, 1, contentions_addr); - - __ cmp(prev_contentions_value, zr); - __ br(Assembler::LS, decrement_contentions_fast_path); // Mr. Deflator won the race. - - // ... then try to take the ownership. If we manage to cancel deflation, - // ObjectMonitor::deflate_monitor() will decrement contentions, which is why - // we don't do it here. - __ mov(rscratch2, checked_cast(reinterpret_cast(DEFLATER_MARKER))); - __ cmpxchg(owner_addr, rscratch2, rthread, Assembler::xword, /*acquire*/ true, - /*release*/ false, /*weak*/ false, zr); - __ br(Assembler::EQ, slow_path); // We successfully canceled deflation. - - __ cmpxchg(owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true, - /*release*/ false, /*weak*/ false, zr); - __ br(Assembler::EQ, decrement_contentions_slow_path); - - __ bind(decrement_contentions_fast_path); - __ atomic_addw(noreg, -1, contentions_addr); - __ b(unlocked_continuation()); - - __ bind(decrement_contentions_slow_path); - __ atomic_addw(noreg, -1, contentions_addr); - __ bind(slow_path); - __ cmp(zr, rthread); // Set Flag to NE - __ b(slow_path_continuation()); - } -} - #undef __ diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 2d713a39d47a6..39e51e21a50f1 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -339,7 +339,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis // Handle inflated monitor. Label inflated, inflated_load_monitor; // Finish fast unlock successfully. MUST branch to with flag == EQ - Label unlocked, set_eq_unlocked; + Label unlocked; // Finish fast unlock unsuccessfully. MUST branch to with flag == NE Label slow_path; @@ -347,16 +347,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis const Register t2_top = t2; const Register t3_t = t3; - Label dummy; - C2FastUnlockLightweightStub* stub = nullptr; - - if (!Compile::current()->output()->in_scratch_emit_size()) { - stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, t1, t3, t2); - Compile::current()->output()->add_stub(stub); - } - - Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); - { // Lightweight unlock // Check if obj is top of lock-stack. @@ -465,15 +455,11 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset())); cmp(rscratch1, zr); br(Assembler::NE, unlocked); - - // Check for, and try to cancel any async deflation. - b(check_deflater); + cmp(zr, rthread); // Set Flag to NE + b(slow_path); } bind(unlocked); - if (stub != nullptr) { - bind(stub->unlocked_continuation()); - } decrement(Address(rthread, JavaThread::held_monitor_count_offset())); cmp(zr, zr); // Set Flags to EQ @@ -485,9 +471,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis #endif bind(slow_path); - if (stub != nullptr) { - bind(stub->slow_path_continuation()); - } #ifdef ASSERT // Check that slow_path label is reached with Flags == NE. br(Assembler::NE, flag_correct); diff --git a/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp index 5048af33358a4..93ee0659a57f8 100644 --- a/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, 2022, SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -26,7 +26,6 @@ #include "precompiled.hpp" #include "opto/c2_MacroAssembler.hpp" #include "opto/c2_CodeStubs.hpp" -#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #define __ masm. @@ -58,82 +57,4 @@ void C2SafepointPollStub::emit(C2_MacroAssembler& masm) { __ mtctr(R0); __ bctr(); } - -int C2FastUnlockLightweightStub::max_size() const { - return 256; -} - -void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { - const Register monitor = _mark; - const Register contentions_addr = _t; - const Register current_value = _t; - const Register prev_contentions_value = _mark; - const Register owner_addr = _thread; - - Label slow_path, decrement_contentions_slow_path, decrement_contentions_fast_path; - - { // Check for, and try to cancel any async deflation. - __ bind(_check_deflater); - - // Compute owner address. - __ addi(owner_addr, monitor, in_bytes(ObjectMonitor::owner_offset())); - - // CAS owner (null => current thread). - __ cmpxchgd(/*flag=*/_flag, - current_value, - /*compare_value=*/(intptr_t)0, - /*exchange_value=*/R16_thread, - /*where=*/owner_addr, - MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock()); - __ beq(_flag, slow_path); - - __ cmpdi(_flag, current_value, reinterpret_cast(DEFLATER_MARKER)); - __ bne(_flag, unlocked_continuation()); - - // The deflator owns the lock. Try to cancel the deflation by - // first incrementing contentions... - __ addi(contentions_addr, monitor, in_bytes(ObjectMonitor::contentions_offset())); - __ li(R0, 1); - __ getandaddw(prev_contentions_value, /*inc_value*/ R0, contentions_addr, /*tmp1*/ _t, MacroAssembler::cmpxchgx_hint_atomic_update()); - - __ cmpwi(_flag, prev_contentions_value, 0); - __ ble(_flag, decrement_contentions_fast_path); // Mr. Deflator won the race. - - // ... then try to take the ownership. If we manage to cancel deflation, - // ObjectMonitor::deflate_monitor() will decrement contentions, which is why - // we don't do it here. - __ cmpxchgd(/*flag=*/_flag, - current_value, - /*compare_value=*/reinterpret_cast(DEFLATER_MARKER), - /*exchange_value=*/R16_thread, - /*where=*/owner_addr, - MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock()); - __ beq(_flag, slow_path); // We successfully canceled deflation. - - // CAS owner (null => current thread). - __ cmpxchgd(/*flag=*/_flag, - current_value, - /*compare_value=*/(intptr_t)0, - /*exchange_value=*/R16_thread, - /*where=*/owner_addr, - MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock()); - __ beq(_flag, decrement_contentions_slow_path); - - __ bind(decrement_contentions_fast_path); - __ li(R0, -1); - __ getandaddw(prev_contentions_value, /*inc_value*/ R0, contentions_addr, /*tmp1*/ _t, MacroAssembler::cmpxchgx_hint_atomic_update()); - __ b(unlocked_continuation()); - - __ bind(decrement_contentions_slow_path); - __ li(R0, -1); - __ getandaddw(prev_contentions_value, /*inc_value*/ R0, contentions_addr, /*tmp1*/ _t, MacroAssembler::cmpxchgx_hint_atomic_update()); - __ bind(slow_path); - __ cmpdi(_flag, R16_thread, 0); // Set Flag to NE - __ b(slow_path_continuation()); - } -} - #undef __ diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 9d52c81b5334c..624a5538a3bc6 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -37,7 +37,6 @@ #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" #include "oops/methodData.hpp" -#include "opto/c2_CodeStubs.hpp" #include "prims/methodHandles.hpp" #include "register_ppc.hpp" #include "runtime/icache.hpp" @@ -2576,16 +2575,6 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f const Register top = tmp2; const Register t = tmp3; - Label dummy; - C2FastUnlockLightweightStub* stub = nullptr; - - if (!Compile::current()->output()->in_scratch_emit_size()) { - stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(flag, obj, mark, tmp3, tmp2); - Compile::current()->output()->add_stub(stub); - } - - Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); - { // Lightweight unlock Label push_and_slow; @@ -2703,15 +2692,13 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f cmpdi(flag, t, 0); bne(flag, unlocked); - b(check_deflater); + crxor(flag, Assembler::equal, flag, Assembler::equal); // Fast Lock Flag = NE + b(slow_path); } bind(unlocked); - if (stub != nullptr) { - bind(stub->unlocked_continuation()); - } dec_held_monitor_count(t); - cmpd(flag, t, t); // Set Flag to EQ + crorc(flag, Assembler::equal, flag, Assembler::equal); // Fast Lock Flag = EQ #ifdef ASSERT // Check that unlocked label is reached with flag == EQ. @@ -2720,9 +2707,6 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f stop("Fast Lock Flag != EQ"); #endif bind(slow_path); - if (stub != nullptr) { - bind(stub->slow_path_continuation()); - } #ifdef ASSERT // Check that slow_path label is reached with flag == NE. bne(flag, flag_correct); diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp index a9c4e7519be3f..7995750aba96b 100644 --- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp @@ -99,60 +99,4 @@ void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { __ j(continuation()); } -int C2FastUnlockLightweightStub::max_size() const { - return 256; -} - -void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { - const Register flag = t1; - const Register monitor = _mark; - const Register contentions_addr = _t; - const Register prev_contentions_value = _mark; - const Register owner_addr = _thread; - - Label slow_path, fast_path, decrement_contentions_slow_path, decrement_contentions_fast_path; - - { // Check for, and try to cancel any async deflation. - __ bind(_check_deflater); - - // CAS owner (null => current thread). - __ cmpxchg(owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, - /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t1); - __ beqz(t1, slow_path); - - __ li(t0, reinterpret_cast(DEFLATER_MARKER)); - __ bne(t0, t1, fast_path); - - // The deflator owns the lock. Try to cancel the deflation by - // first incrementing contentions... - __ la(contentions_addr, Address(monitor, ObjectMonitor::contentions_offset())); - __ atomic_addw(prev_contentions_value, 1, contentions_addr); - - __ blez(prev_contentions_value, decrement_contentions_fast_path); // Mr. Deflator won the race. - - // ... then try to take the ownership. If we manage to cancel deflation, - // ObjectMonitor::deflate_monitor() will decrement contentions, which is why - // we don't do it here. - // t1 contains DEFLATER_MARKER (the current owner) - __ cmpxchg(owner_addr, /*expected*/ t1, /*new*/ xthread, Assembler::int64, - /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t2); - __ beq(t1, t2, slow_path); // We successfully canceled deflation. - - __ cmpxchg(owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, - /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t1); - __ beqz(t1, decrement_contentions_slow_path); - - __ bind(decrement_contentions_fast_path); - __ atomic_addw(noreg, -1, contentions_addr); - __ bind(fast_path); - __ j(unlocked_continuation()); - - __ bind(decrement_contentions_slow_path); - __ atomic_addw(noreg, -1, contentions_addr); - __ bind(slow_path); - __ mv(flag, 1); // Set Flag to NE - __ j(slow_path_continuation()); - } -} - #undef __ diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index cfbd09e791248..9097248f6754f 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -386,16 +386,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg const Register tmp2_top = tmp2; const Register tmp3_t = tmp3; - Label dummy; - C2FastUnlockLightweightStub* stub = nullptr; - - if (!Compile::current()->output()->in_scratch_emit_size()) { - stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, tmp1_mark, tmp3, tmp2); - Compile::current()->output()->add_stub(stub); - } - - Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); - { // Lightweight unlock // Check if obj is top of lock-stack. @@ -505,15 +495,11 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg // Check successor. ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset())); bnez(tmp3_t, unlocked); - - // Check for, and try to cancel any async deflation. - j(check_deflater); + mv(flag, 1); + j(slow_path); } bind(unlocked); - if (stub != nullptr) { - bind(stub->unlocked_continuation()); - } mv(flag, zr); decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3); @@ -525,9 +511,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg #endif bind(slow_path); - if (stub != nullptr) { - bind(stub->slow_path_continuation()); - } #ifdef ASSERT // Check that slow_path label is reached with flag != 0. bnez(flag, flag_correct); diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp index 42a6fd77639c2..44f897529e7ce 100644 --- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -74,7 +74,7 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { } int C2FastUnlockLightweightStub::max_size() const { - return 256; + return 128; } void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { @@ -89,51 +89,7 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) { __ movptr(Address(_thread, _t), _obj); #endif __ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize); - __ xorl(rax, 1); // Make ZF = 0 - __ jmp(slow_path_continuation()); - } - - { // Check for, and try to cancel any async deflation. - __ bind(_check_deflater); - - const Register monitor = _mark; - Label slow_path, decrement_contentions_slow_path, decrement_contentions_fast_path; - - // CAS owner (null => current thread). - __ xorptr(rax, rax); - __ lock(); __ cmpxchgptr(_thread, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - __ jccb(Assembler::equal, slow_path); - - // rax contains the current owner here - __ cmpptr(rax, reinterpret_cast(DEFLATER_MARKER)); - __ jcc(Assembler::notEqual, unlocked_continuation()); - - // The deflator owns the lock. Try to cancel the deflation by - // first incrementing contentions... - __ lock(); __ incrementl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions))); - - __ cmpl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions)), 0); - __ jccb(Assembler::less, decrement_contentions_fast_path); // Mr. Deflator won the race. - - // ... then try to take the ownership. If we manage to cancel deflation, - // ObjectMonitor::deflate_monitor() will decrementcontentions, which is why - // we don'tdo it here. - // rax contains DEFLATER_MARKER (the current owner) - __ lock(); __ cmpxchgptr(_thread, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - __ jcc(Assembler::equal, slow_path); // We successfully canceled deflation. - - __ xorptr(rax, rax); - __ lock(); __ cmpxchgptr(_thread, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - __ jccb(Assembler::equal, decrement_contentions_slow_path); - - __ bind(decrement_contentions_fast_path); - __ lock(); __ decrementl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions))); - __ jmp(unlocked_continuation()); - - __ bind(decrement_contentions_slow_path); - __ lock(); __ decrementl(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(contentions))); - __ bind(slow_path); - __ xorl(rax, 1); // Make ZF = 0 + // addl will always result in ZF = 0 (no overflows). __ jmp(slow_path_continuation()); } } diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 872abe4b0425b..f297396feb19b 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1061,7 +1061,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, } Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path(); - Label& check_deflater = stub == nullptr ? dummy : stub->check_deflater(); { // Lightweight Unlock @@ -1131,17 +1130,16 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences lock(); addl(Address(rsp, 0), 0); - // Check if the entry lists are empty, and if so we are done. + // Check if the entry lists are empty. movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - jccb(Assembler::zero, unlocked); + jccb(Assembler::zero, unlocked); // If so we are done. - // Check if there is a successor, and if there is we are done. + // Check if there is a successor. cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb(Assembler::notZero, unlocked); - - // Check for, and try to cancel any async deflation. - jmp(check_deflater); + jccb(Assembler::notZero, unlocked); // If so we are done. + testl(monitor, monitor); // Fast Unlock ZF = 0 + jmpb(slow_path); // Recursive unlock. bind(recursive); @@ -1149,11 +1147,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, } bind(unlocked); - if (stub != nullptr) { - bind(stub->unlocked_continuation()); - } decrement(Address(thread, JavaThread::held_monitor_count_offset())); - xorl(t, t); // Set ZF = 1 + xorl(t, t); // Fast Unlock ZF = 1 #ifdef ASSERT // Check that unlocked label is reached with ZF set. diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp index f65138027e485..0adcff4219a2e 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.hpp +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -99,26 +99,18 @@ class C2EntryBarrierStub: public C2CodeStub { class C2FastUnlockLightweightStub : public C2CodeStub { private: - PPC_ONLY(ConditionRegister _flag;) Register _obj; Register _mark; Register _t; Register _thread; Label _push_and_slow_path; - Label _check_deflater; Label _unlocked_continuation; public: -#ifdef PPC64 - C2FastUnlockLightweightStub(ConditionRegister flag, Register obj, Register mark, Register t, Register thread) : C2CodeStub(), - _flag(flag), _obj(obj), _mark(mark), _t(t), _thread(thread) {} -#else C2FastUnlockLightweightStub(Register obj, Register mark, Register t, Register thread) : C2CodeStub(), _obj(obj), _mark(mark), _t(t), _thread(thread) {} -#endif int max_size() const; void emit(C2_MacroAssembler& masm); Label& push_and_slow_path() { return _push_and_slow_path; } - Label& check_deflater() { return _check_deflater; } Label& unlocked_continuation() { return _unlocked_continuation; } Label& slow_path_continuation() { return continuation(); } }; diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 4e3f5551f4600..33be1f88bbae5 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -340,9 +340,6 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { set_owner_from_BasicLock(prev_owner, locking_thread); success = true; } - assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT - ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}, observed owner: " INTPTR_FORMAT, - p2i(locking_thread), p2i(this), p2i(owner_raw()), p2i(prev_owner)); } else { // Async deflation is in progress and our contentions increment // above lost the race to async deflation. Undo the work and @@ -524,16 +521,32 @@ bool ObjectMonitor::enter(JavaThread* current) { ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { void* own = owner_raw(); - if (own != nullptr) return TryLockResult::HasOwner; - if (try_set_owner_from(nullptr, current) == nullptr) { - assert(_recursions == 0, "invariant"); - return TryLockResult::Success; + void* first_own = own; + + for (;;) { + if (own == DEFLATER_MARKER) { + if (enter_for(current)) { + assert(_recursions == 0, "invariant"); + return TryLockResult::Success; + } else { + // Deflation won or change of owner; dont spin + break; + } + } else if (own == nullptr) { + void* prev_own = try_set_owner_from(nullptr, current); + if (prev_own == nullptr) { + assert(_recursions == 0, "invariant"); + return TryLockResult::Success; + } else { + // The lock had been free momentarily, but we lost the race to the lock. + own = prev_own; + } + } else { + // Retry doesn't make as much sense because the lock was just acquired. + break; + } } - // The lock had been free momentarily, but we lost the race to the lock. - // Interference -- the CAS failed. - // We can either return -1 or retry. - // Retry doesn't make as much sense because the lock was just acquired. - return TryLockResult::Interference; + return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference; } // Deflate the specified ObjectMonitor if not in-use. Returns true if it @@ -717,24 +730,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { return; } - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - assert(_succ != current, "invariant"); - assert(_Responsible != current, "invariant"); - return; - } - assert(InitDone, "Unexpectedly not initialized"); // We try one round of spinning *before* enqueueing current. @@ -841,22 +836,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { break; } - - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - break; - } assert(owner_raw() != current, "invariant"); // park self @@ -875,22 +854,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { break; } - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // Cancelled the in-progress async deflation by changing owner from - // DEFLATER_MARKER to current. As part of the contended enter protocol, - // contentions was incremented to a positive value before EnterI() - // was called and that prevents the deflater thread from winning the - // last part of the 2-part async deflation protocol. After EnterI() - // returns to enter(), contentions is decremented because the caller - // now owns the monitor. We bump contentions an extra time here to - // prevent the deflater thread from winning the last part of the - // 2-part async deflation protocol after the regular decrement - // occurs in enter(). The deflater thread will decrement contentions - // after it recognizes that the async deflation was cancelled. - add_to_contentions(1); - break; - } - // The lock is still contested. // Keep a tally of the # of futile wakeups. @@ -1300,26 +1263,39 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { return; } - // The deflator owns the lock. Try to cancel the deflation by - // first incrementing contentions... + // The deflator owns the lock. Now try to cancel the async + // deflation. As part of the contended enter protocol, + // contentions was incremented to a positive value before + // EnterI() was called and that prevents the deflater thread + // from winning the last part of the 2-part async deflation + // protocol. After EnterI() returns to enter(), contentions is + // decremented because the caller now owns the monitor. We need + // to increment contentions an extra time here to prevent the + // deflater thread from winning the last part of the 2-part + // async deflation protocol after the regular decrement occurs + // in enter(). add_to_contentions(1); if (contentions() < 0) { assert((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != nullptr, ""); + // Mr. Deflator won the race. Rebalance contentions, and then + // we are done. add_to_contentions(-1); - // Mr. Deflator won the race, so we are done. return; } - // ... then try to take the ownership. + // Now try to take the ownership. if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // We successfully canceled deflation. ObjectMonitor::deflate_monitor() - // will decrement contentions, which is why we don't do it here. + // We successfully cancelled the in-progress async deflation. + // ObjectMonitor::deflate_monitor() will decrement contentions + // after it recognizes that the async deflation was + // cancelled, which is why we don't do it here. } else { owner = try_set_owner_from(nullptr, current); add_to_contentions(-1); if (owner != nullptr) { - // Owned by another thread, so we are done. + // The lock is owned by another thread, who is now + // responsible for ensuring succession, so we are done. return; } } diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index f201af95f6c20..42d234aaf916e 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -337,6 +337,10 @@ class ObjectMonitor : public CHeapObj { void notify(TRAPS); void notifyAll(TRAPS); + enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; + + TryLockResult TryLock(JavaThread* current); + void print() const; #ifdef ASSERT void print_debug_style_on(outputStream* st) const; @@ -355,11 +359,6 @@ class ObjectMonitor : public CHeapObj { void ReenterI(JavaThread* current, ObjectWaiter* current_node); void UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node); - - enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; - - TryLockResult TryLock(JavaThread* current); - bool TrySpin(JavaThread* current); bool short_fixed_spin(JavaThread* current, int spin_count, bool adapt); void ExitEpilog(JavaThread* current, ObjectWaiter* Wakee); diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 0a7f5c5467668..87fa7e5ec3f16 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -63,6 +63,7 @@ #include "runtime/java.hpp" #include "runtime/javaCalls.hpp" #include "runtime/jniHandles.inline.hpp" +#include "runtime/objectMonitor.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stackWatermarkSet.hpp" #include "runtime/stubRoutines.hpp" @@ -1914,6 +1915,18 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea ExceptionMark em(current); // The object could become unlocked through a JNI call, which we have no other checks for. // Give a fatal message if CheckJNICalls. Otherwise we ignore it. + + markWord mark = obj->mark(); + if (mark.has_monitor()) { + ObjectMonitor* m = mark.monitor(); + assert(m->object()->mark() == mark, "invariant"); + if (!m->is_entered(current)) { + if (m->TryLock(current) != ObjectMonitor::TryLockResult::Success) { + current->inc_held_monitor_count(-1); + return; + } + } + } if (obj->is_unlocked()) { if (CheckJNICalls) { fatal("Object has been unlocked by JNI"); From f3df50e8dea0c49bae7f252ef28b11b6e858aa1d Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 7 Aug 2024 11:46:44 +0200 Subject: [PATCH 04/11] Fixed legacy locking --- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 54 ++++++++--- src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 53 ++++++++--- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 40 ++++++-- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 91 ++++++------------- src/hotspot/share/runtime/javaThread.cpp | 1 + src/hotspot/share/runtime/javaThread.hpp | 8 ++ src/hotspot/share/runtime/objectMonitor.cpp | 8 +- src/hotspot/share/runtime/sharedRuntime.cpp | 33 ++++--- 8 files changed, 176 insertions(+), 112 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 4e6d92ae6205e..b9873db8befd1 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -150,10 +150,12 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe Register oop = objectReg; Register box = boxReg; Register disp_hdr = tmpReg; + Register owner_addr = tmpReg; Register tmp = tmp2Reg; Label cont; Label object_has_monitor; Label count, no_count; + Label unlocked; assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr); @@ -204,14 +206,38 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe b(cont); bind(notRecursive); + + // Compute owner address. + lea(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); + + // Set owner to null. + // Release to satisfy the JMM + stlr(zr, owner_addr); + membar(StoreLoad); + + // Check if the entry lists are empty. ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset())); - ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. - cmp(rscratch1, zr); // Sets flags for result - cbnz(rscratch1, cont); - // need a release store here - lea(tmp, Address(tmp, ObjectMonitor::owner_offset())); - stlr(zr, tmp); // set unowned + ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset())); + orr(rscratch1, rscratch1, tmpReg); + cmp(rscratch1, zr); + br(Assembler::EQ, cont); // If so we are done. + + // Check if there is a successor. + ldr(rscratch1, Address(tmp, ObjectMonitor::succ_offset())); + cmp(rscratch1, zr); + br(Assembler::NE, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + str(tmp, Address(rthread, JavaThread::unlocked_inflated_monitor_offset())); + + cmp(zr, rthread); // Set Flag to NE => slow path + b(cont); + + bind(unlocked); + cmp(zr, zr); // Set Flag to EQ => fast path + + // Intentional fall-through bind(cont); // flag == EQ indicates success @@ -450,18 +476,24 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Regis ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset())); orr(rscratch1, rscratch1, t3_t); cmp(rscratch1, zr); - br(Assembler::EQ, unlocked); + br(Assembler::EQ, unlocked); // If so we are done. + // Check if there is a successor. ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset())); cmp(rscratch1, zr); - br(Assembler::NE, unlocked); - cmp(zr, rthread); // Set Flag to NE + br(Assembler::NE, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + str(t1_monitor, Address(rthread, JavaThread::unlocked_inflated_monitor_offset())); + + cmp(zr, rthread); // Set Flag to NE => slow path b(slow_path); } bind(unlocked); decrement(Address(rthread, JavaThread::held_monitor_count_offset())); - cmp(zr, zr); // Set Flags to EQ + cmp(zr, zr); // Set Flags to EQ => fast path #ifdef ASSERT // Check that unlocked label is reached with Flags == EQ. diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index d5717b923fadb..979694c368966 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2661,7 +2661,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe Register temp, Register displaced_header, Register current_header) { assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_unlock_lightweight"); assert_different_registers(oop, box, temp, displaced_header, current_header); - Label success, failure, object_has_monitor, notRecursive; + Label success, unlocked, failure, object_has_monitor, notRecursive; if (LockingMode == LM_LEGACY) { // Find the lock address and load the displaced header from the stack. @@ -2722,13 +2722,35 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe b(success); bind(notRecursive); - ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); - ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header); - orr(temp, temp, displaced_header); // Will be 0 if both are 0. - cmpdi(flag, temp, 0); - bne(flag, failure); + const Register temp2 = displaced_header; + + // Set owner to null. release(); + li(temp, 0); std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); + membar(StoreLoad); + + // Check if the entry lists are empty. + ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); + ld(temp2, in_bytes(ObjectMonitor::cxq_offset()), current_header); + orr(temp, temp, temp2); + cmpdi(flag, temp, 0); + beq(flag, success); // If so we are done. + + // Check if there is a successor. + ld(temp, in_bytes(ObjectMonitor::succ_offset()), current_header); + cmpdi(flag, temp, 0); + bne(flag, success); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + std(current_header, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread); + + crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path + b(failure); + + bind(unlocked); + crorc(flag, Assembler::equal, flag, Assembler::equal); // Set flag = EQ => fast path // flag == EQ indicates success, decrement held monitor count // flag == NE indicates failure @@ -2965,8 +2987,6 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f bind(not_recursive); - const Register t2 = tmp2; - // Set owner to null. release(); li(t, 0); @@ -2975,22 +2995,27 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f // Check if the entry lists are empty. ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); - ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor); - orr(t, t, t2); + ld(tmp2, in_bytes(ObjectMonitor::cxq_offset()), monitor); + orr(t, t, tmp2); cmpdi(flag, t, 0); - beq(flag, unlocked); + beq(flag, unlocked); // If so we are done. + // Check if there is a successor. ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor); cmpdi(flag, t, 0); - bne(flag, unlocked); + bne(flag, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + std(monitor, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread); - crxor(flag, Assembler::equal, flag, Assembler::equal); // Fast Lock Flag = NE + crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path b(slow_path); } bind(unlocked); dec_held_monitor_count(t); - crorc(flag, Assembler::equal, flag, Assembler::equal); // Fast Lock Flag = EQ + crorc(flag, Assembler::equal, flag, Assembler::equal); // Set flag = EQ => fast path #ifdef ASSERT // Check that unlocked label is reached with flag == EQ. diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 80748c420ba49..aaf29fccf9d99 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -165,6 +165,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register oop = objectReg; Register box = boxReg; Register disp_hdr = tmp1Reg; + Register owner_addr = tmp1Reg; Register tmp = tmp2Reg; Label object_has_monitor; // Finish fast lock successfully. MUST branch to with flag == 0 @@ -222,15 +223,31 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, j(unlocked); bind(notRecursive); + // Compute owner address. + la(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); + + // Set owner to null. + // Release to satisfy the JMM + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(owner_addr)); + membar(StoreLoad); + + // Check if the entry lists are empty. ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(t0, t0, disp_hdr); // Will be 0 if both are 0. - bnez(t0, slow_path); + orr(t0, t0, disp_hdr); + beqz(t0, unlocked); // If so we are done. - // need a release store here - la(tmp, Address(tmp, ObjectMonitor::owner_offset())); - membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp)); // set unowned + // Check if there is a successor. + ld(t0, Address(tmp, ObjectMonitor::succ_offset())); + bnez(t0, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + sd(tmp, Address(xthread, JavaThread::unlocked_inflated_monitor_offset())); + + mv(flag, 1); + j(slow_path); bind(unlocked); mv(flag, zr); @@ -490,11 +507,16 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); orr(t0, t0, tmp3_t); - beqz(t0, unlocked); + beqz(t0, unlocked); // If so we are done. - // Check successor. + // Check if there is a successor. ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset())); - bnez(tmp3_t, unlocked); + bnez(tmp3_t, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try + // to reacquire the lock in SharedRuntime::monitor_exit_helper(). + sd(tmp1_monitor, Address(xthread, JavaThread::unlocked_inflated_monitor_offset())); + mv(flag, 1); j(slow_path); } diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 6d2e57589b6ff..0b01d8478f454 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -460,87 +460,46 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // IA32's memory-model is SPO, so STs are ordered with respect to // each other and there's no need for an explicit barrier (fence). // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. -#ifndef _LP64 - // Note that we could employ various encoding schemes to reduce - // the number of loads below (currently 4) to just 2 or 3. - // Refer to the comments in synchronizer.cpp. - // In practice the chain of fetches doesn't seem to impact performance, however. - xorptr(boxReg, boxReg); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - jccb (Assembler::notZero, DONE_LABEL); - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb (DONE_LABEL); -#else // _LP64 - // It's inflated - Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath; + Label LSuccess, LNotRecursive; cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0); jccb(Assembler::equal, LNotRecursive); // Recursive inflated unlock - decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + decrement(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jmpb(LSuccess); bind(LNotRecursive); - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - jccb (Assembler::notZero, CheckSucc); - // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); - jmpb (DONE_LABEL); - - // Try to avoid passing control into the slow_path ... - bind (CheckSucc); - - // The following optional optimization can be elided if necessary - // Effectively: if (succ == null) goto slow path - // The code reduces the window for a race, however, - // and thus benefits performance. - cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb (Assembler::zero, LGoSlowPath); - - xorptr(boxReg, boxReg); - // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. + // Release lock. movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); // Memory barrier/fence - // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. // This is faster on Nehalem and AMD Shanghai/Barcelona. // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences - // We might also restructure (ST Owner=0;barrier;LD _Succ) to - // (mov box,0; xchgq box, &m->Owner; LD _succ) . lock(); addl(Address(rsp, 0), 0); + // Check if the entry lists are empty. + movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); + orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); + jccb(Assembler::zero, LSuccess); // If so we are done. + + // Check if there is a successor. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); - jccb (Assembler::notZero, LSuccess); - - // Rare inopportune interleaving - race. - // The successor vanished in the small window above. - // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. - // We need to ensure progress and succession. - // Try to reacquire the lock. - // If that fails then the new owner is responsible for succession and this - // thread needs to take no further action and can exit via the fast path (success). - // If the re-acquire succeeds then pass control into the slow path. - // As implemented, this latter mode is horrible because we generated more - // coherence traffic on the lock *and* artificially extended the critical section - // length while by virtue of passing control into the slow path. - - // box is really RAX -- the following CMPXCHG depends on that binding - // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) - lock(); - cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - // There's no successor so we tried to regrab the lock. - // If that didn't work, then another thread grabbed the - // lock so we're done (and exit was a success). - jccb (Assembler::notEqual, LSuccess); + jccb(Assembler::notZero, LSuccess); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + andptr(tmpReg, ~(int32_t)markWord::monitor_value); +#ifndef _LP64 + get_thread(boxReg); + movptr(Address(boxReg, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); +#else // _LP64 + movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); +#endif + // Intentional fall-through into slow path - bind (LGoSlowPath); orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); @@ -548,7 +507,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); -#endif if (LockingMode == LM_LEGACY) { bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch @@ -779,6 +737,13 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Check if there is a successor. cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD); jccb(Assembler::notZero, unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + movptr(reg_rax, mark); + andptr(reg_rax, ~(int32_t)markWord::monitor_value); + movptr(Address(thread, JavaThread::unlocked_inflated_monitor_offset()), reg_rax); + testl(monitor, monitor); // Fast Unlock ZF = 0 jmpb(slow_path); diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index a3eef07ba0ac9..55c6c5f6bf935 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -487,6 +487,7 @@ JavaThread::JavaThread(MEMFLAGS flags) : _cont_fastpath_thread_state(1), _held_monitor_count(0), _jni_monitor_count(0), + _unlocked_inflated_monitor(nullptr), _handshake(this), diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index 755a8268864dd..1e8a0acd21890 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -463,6 +463,7 @@ class JavaThread: public Thread { // It's signed for error detection. intx _held_monitor_count; // used by continuations for fast lock detection intx _jni_monitor_count; + ObjectMonitor* _unlocked_inflated_monitor; private: @@ -614,6 +615,12 @@ class JavaThread: public Thread { intx jni_monitor_count() { return _jni_monitor_count; } void clear_jni_monitor_count() { _jni_monitor_count = 0; } + // Support for SharedRuntime::monitor_exit_helper() + ObjectMonitor* unlocked_inflated_monitor() { return _unlocked_inflated_monitor; } + void clear_unlocked_inflated_monitor() { + _unlocked_inflated_monitor = nullptr; + } + inline bool is_vthread_mounted() const; inline const ContinuationEntry* vthread_continuation() const; @@ -827,6 +834,7 @@ class JavaThread: public Thread { static ByteSize cont_fastpath_offset() { return byte_offset_of(JavaThread, _cont_fastpath); } static ByteSize held_monitor_count_offset() { return byte_offset_of(JavaThread, _held_monitor_count); } static ByteSize jni_monitor_count_offset() { return byte_offset_of(JavaThread, _jni_monitor_count); } + static ByteSize unlocked_inflated_monitor_offset() { return byte_offset_of(JavaThread, _unlocked_inflated_monitor); } #if INCLUDE_JVMTI static ByteSize is_in_VTMS_transition_offset() { return byte_offset_of(JavaThread, _is_in_VTMS_transition); } diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 305b48a9dbb6d..088de599399c7 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -808,10 +808,10 @@ void ObjectMonitor::EnterI(JavaThread* current) { // -- the checker -- parked on a timer. if (nxt == nullptr && _EntryList == nullptr - X86_ONLY (&& LockingMode != LM_LIGHTWEIGHT) - RISCV_ONLY (&& LockingMode != LM_LIGHTWEIGHT) - AARCH64_ONLY(&& LockingMode != LM_LIGHTWEIGHT) - PPC_ONLY (&& LockingMode != LM_LIGHTWEIGHT) + X86_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) + RISCV_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) + AARCH64_ONLY( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) + PPC_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) ) { // Try to assume the role of responsible thread for the monitor. // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index c27d7b6f67a6c..44a6a64833e47 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1916,20 +1916,31 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea assert(JavaThread::current() == current, "invariant"); // Exit must be non-blocking, and therefore no exceptions can be thrown. ExceptionMark em(current); - // The object could become unlocked through a JNI call, which we have no other checks for. - // Give a fatal message if CheckJNICalls. Otherwise we ignore it. - markWord mark = obj->mark(); - if (mark.has_monitor()) { - ObjectMonitor* m = mark.monitor(); - assert(m->object()->mark() == mark, "invariant"); - if (!m->is_entered(current)) { - if (m->TryLock(current) != ObjectMonitor::TryLockResult::Success) { - current->inc_held_monitor_count(-1); - return; - } + // Check if C2_MacroAssembler::fast_unlock() or + // C2_MacroAssembler::fast_unlock_lightweight() unlocked an inflated + // monitor before going slow path. + ObjectMonitor* m = current->unlocked_inflated_monitor(); + if (m != nullptr) { + current->clear_unlocked_inflated_monitor(); + // We need to reacquire the lock before we can call ObjectSynchronizer::exit(). + if (m->TryLock(current) != ObjectMonitor::TryLockResult::Success) { + // Some other thread acquired the lock (or the monitor was + // deflated). Either way we are done. + current->inc_held_monitor_count(-1); + return; } +#ifdef ASSERT + markWord mark = obj->mark(); + assert(mark.has_monitor(), "Must have a monitor"); + ObjectMonitor* mon = mark.monitor(); + assert(m == mon, "invariant"); + assert(mon->object()->mark() == mark, "invariant"); +#endif } + + // The object could become unlocked through a JNI call, which we have no other checks for. + // Give a fatal message if CheckJNICalls. Otherwise we ignore it. if (obj->is_unlocked()) { if (CheckJNICalls) { fatal("Object has been unlocked by JNI"); From d0e68f4ad699a11d631d7844b51bfce7cd4bc9b8 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Thu, 8 Aug 2024 16:32:59 +0200 Subject: [PATCH 05/11] Fixed s390 --- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 65 +++++++++++++++----- src/hotspot/share/runtime/objectMonitor.cpp | 1 + 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 66d6f25f0fd4a..57e8072351263 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3361,12 +3361,35 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg bind(not_recursive); + NearLabel check_succ, set_eq_unlocked; + + // Set owner to null. + z_release(); + z_lghi(temp, 0); + z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + z_fence(); // membar(StoreLoad); + + // Check if the entry lists are empty. load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - z_brne(done); + z_brne(check_succ); load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - z_brne(done); - z_release(); - z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + z_bre(done); // If so we are done. + + bind(check_succ); + + // Check if there is a successor. + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ))); + z_brne(set_eq_unlocked); // If so we are done. + + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); + + z_cr(currentHeader, Z_thread); // Set flag = NE + z_bru(done); + + bind(set_eq_unlocked); + z_cr(temp, temp); // Set flag = EQ bind(done); @@ -6085,25 +6108,35 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis bind(not_recursive); - NearLabel not_ok; + NearLabel check_succ, set_eq_unlocked; + + // Set owner to null. + z_release(); + z_lghi(tmp2, 0); + z_stg(tmp2, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor); + z_fence(); // membar(StoreLoad); + // Check if the entry lists are empty. load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - z_brne(not_ok); + z_brne(check_succ); load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - z_brne(not_ok); + z_bre(unlocked); // If so we are done. - z_release(); - z_stg(tmp2 /*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor); + bind(check_succ); - z_bru(unlocked); // CC = EQ here + // Check if there is a successor. + load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ))); + z_brne(set_eq_unlocked); // If so we are done. - bind(not_ok); + // Save the monitor pointer in the current thread, so we can try to + // reacquire the lock in SharedRuntime::monitor_exit_helper(). + z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); + + z_cr(monitor, Z_thread); // Set flag = NE + z_bru(slow_path); - // The owner may be anonymous, and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - z_stg(Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor); - z_bru(slow_path); // CC = NE here + bind(set_eq_unlocked); + z_cr(tmp2, tmp2); // Set flag = EQ } bind(unlocked); diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 088de599399c7..b7ccfd7880d37 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -812,6 +812,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { RISCV_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) AARCH64_ONLY( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) PPC_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) + S390_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) ) { // Try to assume the role of responsible thread for the monitor. // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } From 98eb18bcfa2d10fa05fb7200eae2d97acbf9a59e Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Tue, 20 Aug 2024 16:09:46 +0200 Subject: [PATCH 06/11] Removed _Responsible --- src/hotspot/share/runtime/objectMonitor.cpp | 86 +-------------------- src/hotspot/share/runtime/objectMonitor.hpp | 2 - src/hotspot/share/runtime/sharedRuntime.cpp | 3 + 3 files changed, 4 insertions(+), 87 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index b7ccfd7880d37..490921471e51a 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -255,7 +255,6 @@ ObjectMonitor::ObjectMonitor(oop object) : _EntryList(nullptr), _cxq(nullptr), _succ(nullptr), - _Responsible(nullptr), _SpinDuration(ObjectMonitor::Knob_SpinLimit), _contentions(0), _WaitSet(nullptr), @@ -717,8 +716,6 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { return ss->base(); } -#define MAX_RECHECK_INTERVAL 1000 - void ObjectMonitor::EnterI(JavaThread* current) { assert(current->thread_state() == _thread_blocked, "invariant"); @@ -726,7 +723,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { assert(_succ != current, "invariant"); assert(owner_raw() == current, "invariant"); - assert(_Responsible != current, "invariant"); return; } @@ -742,14 +738,12 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TrySpin(current)) { assert(owner_raw() == current, "invariant"); assert(_succ != current, "invariant"); - assert(_Responsible != current, "invariant"); return; } // The Spin failed -- Enqueue and park the thread ... assert(_succ != current, "invariant"); assert(owner_raw() != current, "invariant"); - assert(_Responsible != current, "invariant"); // Enqueue "current" on ObjectMonitor's _cxq. // @@ -779,46 +773,10 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (TryLock(current) == TryLockResult::Success) { assert(_succ != current, "invariant"); assert(owner_raw() == current, "invariant"); - assert(_Responsible != current, "invariant"); return; } } - // Check for cxq|EntryList edge transition to non-null. This indicates - // the onset of contention. While contention persists exiting threads - // will use a ST:MEMBAR:LD 1-1 exit protocol. When contention abates exit - // operations revert to the faster 1-0 mode. This enter operation may interleave - // (race) a concurrent 1-0 exit operation, resulting in stranding, so we - // arrange for one of the contending thread to use a timed park() operations - // to detect and recover from the race. (Stranding is form of progress failure - // where the monitor is unlocked but all the contending threads remain parked). - // That is, at least one of the contended threads will periodically poll _owner. - // One of the contending threads will become the designated "Responsible" thread. - // The Responsible thread uses a timed park instead of a normal indefinite park - // operation -- it periodically wakes and checks for and recovers from potential - // strandings admitted by 1-0 exit operations. We need at most one Responsible - // thread per-monitor at any given moment. Only threads on cxq|EntryList may - // be responsible for a monitor. - // - // Currently, one of the contended threads takes on the added role of "Responsible". - // A viable alternative would be to use a dedicated "stranding checker" thread - // that periodically iterated over all the threads (or active monitors) and unparked - // successors where there was risk of stranding. This would help eliminate the - // timer scalability issues we see on some platforms as we'd only have one thread - // -- the checker -- parked on a timer. - - if (nxt == nullptr && _EntryList == nullptr - X86_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) - RISCV_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) - AARCH64_ONLY( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) - PPC_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) - S390_ONLY ( && (LockingMode != LM_LEGACY) && (LockingMode != LM_LIGHTWEIGHT)) - ) { - // Try to assume the role of responsible thread for the monitor. - // CONSIDER: ST vs CAS vs { if (Responsible==null) Responsible=current } - Atomic::replace_if_null(&_Responsible, current); - } - // The lock might have been released while this thread was occupied queueing // itself onto _cxq. To close the race and avoid "stranding" and // progress-liveness failure we must resample-retry _owner before parking. @@ -830,8 +788,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { // to defer the state transitions until absolutely necessary, // and in doing so avoid some transitions ... - int recheckInterval = 1; - for (;;) { if (TryLock(current) == TryLockResult::Success) { @@ -840,16 +796,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(owner_raw() != current, "invariant"); // park self - if (_Responsible == current) { - current->_ParkEvent->park((jlong) recheckInterval); - // Increase the recheckInterval, but clamp the value. - recheckInterval *= 8; - if (recheckInterval > MAX_RECHECK_INTERVAL) { - recheckInterval = MAX_RECHECK_INTERVAL; - } - } else { - current->_ParkEvent->park(); - } + current->_ParkEvent->park(); if (TryLock(current) == TryLockResult::Success) { break; @@ -899,29 +846,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { if (_succ == current) _succ = nullptr; assert(_succ != current, "invariant"); - if (_Responsible == current) { - _Responsible = nullptr; - OrderAccess::fence(); // Dekker pivot-point - - // We may leave threads on cxq|EntryList without a designated - // "Responsible" thread. This is benign. When this thread subsequently - // exits the monitor it can "see" such preexisting "old" threads -- - // threads that arrived on the cxq|EntryList before the fence, above -- - // by LDing cxq|EntryList. Newly arrived threads -- that is, threads - // that arrive on cxq after the ST:MEMBAR, above -- will set Responsible - // non-null and elect a new "Responsible" timer thread. - // - // This thread executes: - // ST Responsible=null; MEMBAR (in enter epilogue - here) - // LD cxq|EntryList (in subsequent exit) - // - // Entering threads in the slow/contended path execute: - // ST cxq=nonnull; MEMBAR; LD Responsible (in enter prolog) - // The (ST cxq; MEMBAR) is accomplished with CAS(). - // - // The MEMBAR, above, prevents the LD of cxq|EntryList in the subsequent - // exit operation from floating above the ST Responsible=null. - } // We've acquired ownership with CAS(). // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics. @@ -1192,10 +1116,6 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { return; } - // Invariant: after setting Responsible=null a thread must execute - // a MEMBAR or other serializing instruction before fetching EntryList|cxq. - _Responsible = nullptr; - #if INCLUDE_JFR // get the owner's thread id for the MonitorEnter event // if it is enabled and the thread isn't suspended @@ -1551,8 +1471,6 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { AddWaiter(&node); Thread::SpinRelease(&_WaitSetLock); - _Responsible = nullptr; - intx save = _recursions; // record the old recursion count _waiters++; // increment the number of waiters _recursions = 0; // set the recursion level to be 1 @@ -2230,7 +2148,6 @@ void ObjectMonitor::print() const { print_on(tty); } // _EntryList = 0x0000000000000000 // _cxq = 0x0000000000000000 // _succ = 0x0000000000000000 -// _Responsible = 0x0000000000000000 // _SpinDuration = 5000 // _contentions = 0 // _WaitSet = 0x0000700009756248 @@ -2259,7 +2176,6 @@ void ObjectMonitor::print_debug_style_on(outputStream* st) const { st->print_cr(" _EntryList = " INTPTR_FORMAT, p2i(_EntryList)); st->print_cr(" _cxq = " INTPTR_FORMAT, p2i(_cxq)); st->print_cr(" _succ = " INTPTR_FORMAT, p2i(_succ)); - st->print_cr(" _Responsible = " INTPTR_FORMAT, p2i(_Responsible)); st->print_cr(" _SpinDuration = %d", _SpinDuration); st->print_cr(" _contentions = %d", contentions()); st->print_cr(" _WaitSet = " INTPTR_FORMAT, p2i(_WaitSet)); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 42d234aaf916e..8fb3647127cac 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -173,7 +173,6 @@ class ObjectMonitor : public CHeapObj { ObjectWaiter* volatile _cxq; // LL of recently-arrived threads blocked on entry. JavaThread* volatile _succ; // Heir presumptive thread - used for futile wakeup throttling - JavaThread* volatile _Responsible; volatile int _SpinDuration; @@ -338,7 +337,6 @@ class ObjectMonitor : public CHeapObj { void notifyAll(TRAPS); enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; - TryLockResult TryLock(JavaThread* current); void print() const; diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 44a6a64833e47..61f7ee6d1cefc 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1922,7 +1922,9 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea // monitor before going slow path. ObjectMonitor* m = current->unlocked_inflated_monitor(); if (m != nullptr) { + assert(m->owner_raw() != current, "must be"); current->clear_unlocked_inflated_monitor(); + // We need to reacquire the lock before we can call ObjectSynchronizer::exit(). if (m->TryLock(current) != ObjectMonitor::TryLockResult::Success) { // Some other thread acquired the lock (or the monitor was @@ -1931,6 +1933,7 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea return; } #ifdef ASSERT + assert(m->owner_raw() == current, "invariant"); markWord mark = obj->mark(); assert(mark.has_monitor(), "Must have a monitor"); ObjectMonitor* mon = mark.monitor(); From f9ddfc6fb0780a7d6e933a40ecd3cd458a058f04 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Tue, 3 Sep 2024 16:05:22 +0200 Subject: [PATCH 07/11] Small fixes before the review --- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 2 + src/hotspot/share/runtime/objectMonitor.cpp | 136 ++++++++---------- .../org/openjdk/bench/vm/lang/LockUnlock.java | 2 +- 3 files changed, 61 insertions(+), 79 deletions(-) diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 8bc879665165c..4dc4bc0788bbc 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3679,6 +3679,7 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg // Save the monitor pointer in the current thread, so we can try to // reacquire the lock in SharedRuntime::monitor_exit_helper(). + z_xilf(currentHeader, markWord::monitor_value); z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); z_cr(currentHeader, Z_thread); // Set flag = NE @@ -6440,6 +6441,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis // Save the monitor pointer in the current thread, so we can try to // reacquire the lock in SharedRuntime::monitor_exit_helper(). + z_xilf(monitor, markWord::monitor_value); z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); z_ltgr(obj, obj); // Set flag = NE diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 98c6bb729c621..cc1c62c610266 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -178,7 +178,7 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr; // // Cxq points to the set of Recently Arrived Threads attempting entry. // Because we push threads onto _cxq with CAS, the RATs must take the form of -// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when +// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when // the unlocking thread notices that EntryList is null but _cxq is != null. // // The EntryList is ordered by the prevailing queue discipline and @@ -210,19 +210,6 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr; // unpark the notifyee. Unparking a notifee in notify() is inefficient - // it's likely the notifyee would simply impale itself on the lock held // by the notifier. -// -// * An interesting alternative is to encode cxq as (List,LockByte) where -// the LockByte is 0 iff the monitor is owned. _owner is simply an auxiliary -// variable, like _recursions, in the scheme. The threads or Events that form -// the list would have to be aligned in 256-byte addresses. A thread would -// try to acquire the lock or enqueue itself with CAS, but exiting threads -// could use a 1-0 protocol and simply STB to set the LockByte to 0. -// Note that is is *not* word-tearing, but it does presume that full-word -// CAS operations are coherent with intermix with STB operations. That's true -// on most common processors. -// -// * See also http://blogs.sun.com/dave - // Check that object() and set_object() are called from the right context: static void check_object_context() { @@ -320,7 +307,7 @@ bool ObjectMonitor::enter_is_async_deflating() { } bool ObjectMonitor::enterI_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { - // Used by ObjectSynchronizer::enter_for to enter for another thread. + // Used by ObjectSynchronizer::enter_for() to enter for another thread. // The monitor is private to or already owned by locking_thread which must be suspended. // So this code may only contend with deflation. assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); @@ -340,7 +327,7 @@ bool ObjectMonitor::enterI_with_contention_mark(JavaThread* locking_thread, Obje // Racing with deflation. prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread); if (prev_owner == DEFLATER_MARKER) { - // Canceled deflation. Increment contentions as part of the deflation protocol. + // Cancelled deflation. Increment contentions as part of the deflation protocol. add_to_contentions(1); success = true; } else if (prev_owner == nullptr) { @@ -371,6 +358,7 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O } bool ObjectMonitor::enter_for(JavaThread* locking_thread) { + // Used by ObjectSynchronizer::enter_for() to enter for another thread. // Block out deflation as soon as possible. ObjectMonitorContentionMark contention_mark(this); @@ -652,7 +640,7 @@ bool ObjectMonitor::deflate_monitor(Thread* current) { // it which makes it busy so no deflation. Restore owner to // null if it is still DEFLATER_MARKER. if (try_set_owner_from(DEFLATER_MARKER, nullptr) != DEFLATER_MARKER) { - // Deferred decrement for the JT EnterI() that canceled the async deflation. + // Deferred decrement for the JT EnterI() that cancelled the async deflation. add_to_contentions(-1); } return false; @@ -665,7 +653,7 @@ bool ObjectMonitor::deflate_monitor(Thread* current) { // ObjectMonitor is now busy. Restore owner to null if it is // still DEFLATER_MARKER: if (try_set_owner_from(DEFLATER_MARKER, nullptr) != DEFLATER_MARKER) { - // Deferred decrement for the JT EnterI() that canceled the async deflation. + // Deferred decrement for the JT EnterI() that cancelled the async deflation. add_to_contentions(-1); } return false; @@ -907,21 +895,25 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(owner_raw() == current, "invariant"); UnlinkAfterAcquire(current, &node); - if (_succ == current) _succ = nullptr; + if (_succ == current) { + _succ = nullptr; + // Note that we don't need to do OrderAccess::fence() after clearing + // _succ here, since we own the lock. + } assert(_succ != current, "invariant"); // We've acquired ownership with CAS(). // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics. // But since the CAS() this thread may have also stored into _succ, - // EntryList, cxq or Responsible. These meta-data updates must be + // EntryList or cxq. These meta-data updates must be // visible __before this thread subsequently drops the lock. // Consider what could occur if we didn't enforce this constraint -- // STs to monitor meta-data and user-data could reorder with (become // visible after) the ST in exit that drops ownership of the lock. // Some other thread could then acquire the lock, but observe inconsistent // or old monitor meta-data and heap data. That violates the JMM. - // To that end, the 1-0 exit() operation must have at least STST|LDST + // To that end, the exit() operation must have at least STST|LDST // "release" barrier semantics. Specifically, there must be at least a // STST|LDST barrier in exit() before the ST of null into _owner that drops // the lock. The barrier ensures that changes to monitor meta-data and data @@ -931,8 +923,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { // // Critically, any prior STs to _succ or EntryList must be visible before // the ST of null into _owner in the *subsequent* (following) corresponding - // monitorexit. Recall too, that in 1-0 mode monitorexit does not necessarily - // execute a serializing instruction. + // monitorexit. return; } @@ -1105,39 +1096,32 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // In that case exit() is called with _thread_state == _thread_blocked, // but the monitor's _contentions field is > 0, which inhibits reclamation. // -// 1-0 exit -// ~~~~~~~~ -// ::exit() uses a canonical 1-1 idiom with a MEMBAR although some of -// the fast-path operators have been optimized so the common ::exit() -// operation is 1-0, e.g., see macroAssembler_x86.cpp: fast_unlock(). -// The code emitted by fast_unlock() elides the usual MEMBAR. This -// greatly improves latency -- MEMBAR and CAS having considerable local -// latency on modern processors -- but at the cost of "stranding". Absent the -// MEMBAR, a thread in fast_unlock() can race a thread in the slow -// ::enter() path, resulting in the entering thread being stranding -// and a progress-liveness failure. Stranding is extremely rare. -// We use timers (timed park operations) & periodic polling to detect -// and recover from stranding. Potentially stranded threads periodically -// wake up and poll the lock. See the usage of the _Responsible variable. +// This is the exit part of the locking protocol, often implemented in +// C2_MacroAssembler::fast_unlock() +// +// 1. A release barrier ensures that changes to monitor meta-data +// (_succ, _EntryList, _cxq) and data protected by the lock will be +// visible before we release the lock. +// 2. Release the lock by clearing the owner. +// 3. A storeload MEMBAR is needed between releasing the owner and +// subsequently reading meta-data to safely determine if the lock is +// contended (step 4) without an elected successor (step 5). +// 4. If both _EntryList and _cxq are null, we are done, since there is no +// other thread waiting on the lock to wake up. I.e. there is no +// contention. +// 5. If there is a successor (_succ is non-null), we are done. The +// responsibility for guaranteeing progress-liveness has now implicitly +// been moved from the exiting thread to the successor. +// 6. There are waiters in the entry list (_EntryList and/or cxq are +// non-null), but there is no successor (_succ is null), so we need to +// wake up (unpark) a waiting thread to avoid stranding. // -// The CAS() in enter provides for safety and exclusion, while the CAS or -// MEMBAR in exit provides for progress and avoids stranding. 1-0 locking -// eliminates the CAS/MEMBAR from the exit path, but it admits stranding. -// We detect and recover from stranding with timers. +// Note that since only the current lock owner can manipulate the _EntryList +// or drain _cxq, we need to reacquire the lock before we can wake up +// (unpark) a waiting thread. // -// If a thread transiently strands it'll park until (a) another -// thread acquires the lock and then drops the lock, at which time the -// exiting thread will notice and unpark the stranded thread, or, (b) -// the timer expires. If the lock is high traffic then the stranding latency -// will be low due to (a). If the lock is low traffic then the odds of -// stranding are lower, although the worst-case stranding latency -// is longer. Critically, we don't want to put excessive load in the -// platform's timer subsystem. We want to minimize both the timer injection -// rate (timers created/sec) as well as the number of timers active at -// any one time. (more precisely, we want to minimize timer-seconds, which is -// the integral of the # of active timers at any instant over time). -// Both impinge on OS scalability. Given that, at most one thread parked on -// a monitor will use a timer. +// The CAS() in enter provides for safety and exclusion, while the +// MEMBAR in exit provides for progress and avoids stranding. // // There is also the risk of a futile wake-up. If we drop the lock // another thread can reacquire the lock immediately, and we can @@ -1205,14 +1189,15 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Other threads are blocked trying to acquire the lock. // Normally the exiting thread is responsible for ensuring succession, - // but if other successors are ready or other entering threads are spinning - // then this thread can simply store null into _owner and exit without - // waking a successor. The existence of spinners or ready successors - // guarantees proper succession (liveness). Responsibility passes to the - // ready or running successors. The exiting thread delegates the duty. - // More precisely, if a successor already exists this thread is absolved - // of the responsibility of waking (unparking) one. - // + // but if this thread observes other successors are ready or other + // entering threads are spinning after it has stored null into _owner + // then it can exit without waking a successor. The existence of + // spinners or ready successors guarantees proper succession (liveness). + // Responsibility passes to the ready or running successors. The exiting + // thread delegates the duty. More precisely, if a successor already + // exists this thread is absolved of the responsibility of waking + // (unparking) one. + // The _succ variable is critical to reducing futile wakeup frequency. // _succ identifies the "heir presumptive" thread that has been made // ready (unparked) but that has not yet run. We need only one such @@ -1223,16 +1208,10 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Note that spinners in Enter() also set _succ non-null. // In the current implementation spinners opportunistically set // _succ so that exiting threads might avoid waking a successor. - // Another less appealing alternative would be for the exiting thread - // to drop the lock and then spin briefly to see if a spinner managed - // to acquire the lock. If so, the exiting thread could exit - // immediately without waking a successor, otherwise the exiting - // thread would need to dequeue and wake a successor. - // (Note that we'd need to make the post-drop spin short, but no - // shorter than the worst-case round-trip cache-line migration time. - // The dropped lock needs to become visible to the spinner, and then - // the acquisition of the lock by the spinner must become visible to - // the exiting thread). + // Which means that the exiting thread could exit immediately without + // waking a successor, if it observes a successor after it has dropped + // the lock. Note that the dropped lock needs to become visible to the + // spinner. // It appears that an heir-presumptive (successor) must be made ready. // Only the current lock owner can manipulate the EntryList or @@ -1255,9 +1234,10 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // protocol. After EnterI() returns to enter(), contentions is // decremented because the caller now owns the monitor. We need // to increment contentions an extra time here (done by the - // contention_mark constructor) to prevent the deflater thread - // from winning the last part of the 2-part async deflation - // protocol after the regular decrement occurs in enter(). + // ObjectMonitorContentionMark constructor) to prevent the + // deflater thread from winning the last part of the 2-part + // async deflation protocol after the regular decrement occurs + // in enter(). ObjectMonitorContentionMark contention_mark(this); if (contentions() < 0) { @@ -1268,12 +1248,12 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // Now try to take the ownership. if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // We successfully canceled the in-progress async deflation. + // We successfully cancelled the in-progress async deflation. // By extending the lifetime of the contention_mark, we // prevent the destructor from decrementing the contentions // counter when the contention_mark goes out of scope. Instead // ObjectMonitor::deflate_monitor() will decrement contentions - // after it recognizes that the async deflation was canceled. + // after it recognizes that the async deflation was cancelled. contention_mark.extend(); } else { owner = try_set_owner_from(nullptr, current); @@ -1344,7 +1324,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { q = p; } - // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = nullptr + // We need to: ST EntryList; MEMBAR #storestore; ST _owner = nullptr // The MEMBAR is satisfied by the release_store() operation in ExitEpilog(). // See if we can abdicate to a spinner instead of waking a thread. diff --git a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java index 3ed862e8218cd..c43eb05b0fed4 100644 --- a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java +++ b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java @@ -312,7 +312,7 @@ private synchronized int fact(int n) { * With two threads lockObject1 will be contended so should be * inflated. */ - @Threads(2) + @Threads(3) @Benchmark public void testContendedLock() { synchronized (lockObject1) { From d2c6db2b9673ab8381bc7c86ada90da552350cb8 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Fri, 13 Sep 2024 15:07:58 +0200 Subject: [PATCH 08/11] Update one, after the review --- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 5 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 6 --- src/hotspot/share/runtime/javaThread.hpp | 2 +- src/hotspot/share/runtime/objectMonitor.cpp | 50 ++++++++++--------- src/hotspot/share/runtime/objectMonitor.hpp | 4 +- 5 files changed, 30 insertions(+), 37 deletions(-) diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index ab274a7bad9f6..f6862ae6a1ef0 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -233,8 +233,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // Check if the entry lists are empty. ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); - ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(t0, t0, disp_hdr); + ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset())); + orr(t0, t0, tmp1Reg); beqz(t0, unlocked); // If so we are done. // Check if there is a successor. @@ -550,7 +550,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, bind(not_recursive); - Label release; const Register tmp2_owner_addr = tmp2; // Compute owner address. diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index cdf34d4f5068f..76667ab043a67 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -475,12 +475,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); membar(StoreLoad); - // Memory barrier/fence - // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. - // This is faster on Nehalem and AMD Shanghai/Barcelona. - // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences - lock(); addl(Address(rsp, 0), 0); - // Check if the entry lists are empty. movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index defce64d9a222..fd47d5b78dd04 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -617,7 +617,7 @@ class JavaThread: public Thread { void clear_jni_monitor_count() { _jni_monitor_count = 0; } // Support for SharedRuntime::monitor_exit_helper() - ObjectMonitor* unlocked_inflated_monitor() { return _unlocked_inflated_monitor; } + ObjectMonitor* unlocked_inflated_monitor() const { return _unlocked_inflated_monitor; } void clear_unlocked_inflated_monitor() { _unlocked_inflated_monitor = nullptr; } diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index cc1c62c610266..c5734ccb08ea6 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -306,8 +306,7 @@ bool ObjectMonitor::enter_is_async_deflating() { return false; } -bool ObjectMonitor::enterI_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { - // Used by ObjectSynchronizer::enter_for() to enter for another thread. +bool ObjectMonitor::TryLock_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { // The monitor is private to or already owned by locking_thread which must be suspended. // So this code may only contend with deflation. assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); @@ -327,8 +326,13 @@ bool ObjectMonitor::enterI_with_contention_mark(JavaThread* locking_thread, Obje // Racing with deflation. prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread); if (prev_owner == DEFLATER_MARKER) { - // Cancelled deflation. Increment contentions as part of the deflation protocol. - add_to_contentions(1); + // We successfully cancelled the in-progress async deflation. + // By extending the lifetime of the contention_mark, we + // prevent the destructor from decrementing the contentions + // counter when the contention_mark goes out of scope. Instead + // ObjectMonitor::deflate_monitor() will decrement contentions + // after it recognizes that the async deflation was cancelled. + contention_mark.extend(); success = true; } else if (prev_owner == nullptr) { // At this point we cannot race with deflation as we have both incremented @@ -350,7 +354,8 @@ bool ObjectMonitor::enterI_with_contention_mark(JavaThread* locking_thread, Obje } void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { - DEBUG_ONLY(bool success = ) ObjectMonitor::enterI_with_contention_mark(locking_thread, contention_mark); + // Used by LightweightSynchronizer::inflate_and_enter in deoptimization path to enter for another thread. + bool success = ObjectMonitor::TryLock_with_contention_mark(locking_thread, contention_mark); assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", @@ -368,24 +373,15 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { return false; } - enter_for_with_contention_mark(locking_thread, contention_mark); + bool success = ObjectMonitor::TryLock_with_contention_mark(locking_thread, contention_mark); + + assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT + ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", + p2i(locking_thread), p2i(this), p2i(owner_raw())); assert(owner_raw() == locking_thread, "must be"); return true; } -bool ObjectMonitor::TryLockI(JavaThread* current) { - assert(current == JavaThread::current(), "must be"); - - // Block out deflation as soon as possible. - ObjectMonitorContentionMark contention_mark(this); - - // Check for deflation. - if (enter_is_async_deflating()) { - return false; - } - return enterI_with_contention_mark(current, contention_mark); -} - bool ObjectMonitor::try_enter(JavaThread* current) { // TryLock avoids the CAS TryLockResult r = TryLock(current); @@ -571,7 +567,15 @@ ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { for (;;) { if (own == DEFLATER_MARKER) { - if (TryLockI(current)) { + // Block out deflation as soon as possible. + ObjectMonitorContentionMark contention_mark(this); + + // Check for deflation. + if (enter_is_async_deflating()) { + // Treat deflation as interference. + return TryLockResult::Interference; + } + if (TryLock_with_contention_mark(current, contention_mark)) { assert(_recursions == 0, "invariant"); return TryLockResult::Success; } else { @@ -901,8 +905,6 @@ void ObjectMonitor::EnterI(JavaThread* current) { // _succ here, since we own the lock. } - assert(_succ != current, "invariant"); - // We've acquired ownership with CAS(). // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics. // But since the CAS() this thread may have also stored into _succ, @@ -1240,8 +1242,8 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // in enter(). ObjectMonitorContentionMark contention_mark(this); - if (contentions() < 0) { - assert((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != nullptr, ""); + if (is_being_async_deflated()) { + assert((intptr_t(_EntryList) | intptr_t(_cxq)) == 0 || _succ != nullptr, ""); // Mr. Deflator won the race, so we are done. return; } diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 149569a90996b..1285b44d06b52 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -373,12 +373,10 @@ class ObjectMonitor : public CHeapObj { void INotify(JavaThread* current); ObjectWaiter* DequeueWaiter(); void DequeueSpecificWaiter(ObjectWaiter* waiter); - bool enterI_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); - bool TryLockI(JavaThread* current); + bool TryLock_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); void EnterI(JavaThread* current); void ReenterI(JavaThread* current, ObjectWaiter* current_node); void UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node); - bool TrySpin(JavaThread* current); bool short_fixed_spin(JavaThread* current, int spin_count, bool adapt); void ExitEpilog(JavaThread* current, ObjectWaiter* Wakee); From ef5d1683fbca9ba6401a0cb610e887a1ad74e394 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 18 Sep 2024 07:33:28 +0200 Subject: [PATCH 09/11] Update two, after the review --- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 4 + src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 4 + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 4 + src/hotspot/cpu/s390/macroAssembler_s390.cpp | 8 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 4 + .../share/runtime/lightweightSynchronizer.cpp | 2 +- src/hotspot/share/runtime/objectMonitor.cpp | 86 ++++++------------- src/hotspot/share/runtime/objectMonitor.hpp | 13 +-- src/hotspot/share/runtime/sharedRuntime.cpp | 4 +- 9 files changed, 57 insertions(+), 72 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 25b804cd4af51..7e63a1b0acfbf 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -213,6 +213,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe // Set owner to null. // Release to satisfy the JMM stlr(zr, owner_addr); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. @@ -532,6 +534,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi // Set owner to null. // Release to satisfy the JMM stlr(zr, t2_owner_addr); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index fe492e6d81e52..c5e10392aeb1e 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2729,6 +2729,8 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe release(); li(temp, 0); std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. @@ -3004,6 +3006,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f release(); li(t, 0); std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index f6862ae6a1ef0..290f9d2cc502f 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -229,6 +229,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // Set owner to null. membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); sd(zr, Address(owner_addr)); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. @@ -558,6 +560,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, // Set owner to null. membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); sd(zr, Address(tmp2_owner_addr)); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 4dc4bc0788bbc..a0a26e41bbb93 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3663,7 +3663,8 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg z_release(); z_lghi(temp, 0); z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); - z_fence(); // membar(StoreLoad); + // We need a full fence after clearing owner to avoid stranding. + z_fence(); // Check if the entry lists are empty. load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); @@ -3682,7 +3683,7 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg z_xilf(currentHeader, markWord::monitor_value); z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset())); - z_cr(currentHeader, Z_thread); // Set flag = NE + z_ltgr(oop, oop); // Set flag = NE z_bru(done); bind(set_eq_unlocked); @@ -6425,7 +6426,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis z_release(); z_lghi(tmp2, 0); z_stg(tmp2, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor); - z_fence(); // membar(StoreLoad); + // We need a full fence after clearing owner to avoid stranding. + z_fence(); // Check if the entry lists are empty. load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 76667ab043a67..bb176150f0281 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -473,6 +473,8 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // Release lock. movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. @@ -802,6 +804,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Release lock. movptr(owner_address, NULL_WORD); + // We need a full fence after clearing owner to avoid stranding. + // StoreLoad achieves this. membar(StoreLoad); // Check if the entry lists are empty. diff --git a/src/hotspot/share/runtime/lightweightSynchronizer.cpp b/src/hotspot/share/runtime/lightweightSynchronizer.cpp index 424fa742f2c05..0e360dba97bdd 100644 --- a/src/hotspot/share/runtime/lightweightSynchronizer.cpp +++ b/src/hotspot/share/runtime/lightweightSynchronizer.cpp @@ -711,7 +711,7 @@ void LightweightSynchronizer::exit(oop object, JavaThread* current) { assert(current == Thread::current(), "must be"); markWord mark = object->mark(); - assert(mark.is_locked(), "must be"); + assert(!mark.is_unlocked(), "must be"); LockStack& lock_stack = current->lock_stack(); if (mark.is_fast_locked()) { diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index c5734ccb08ea6..6a178f159b8f7 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -306,7 +306,7 @@ bool ObjectMonitor::enter_is_async_deflating() { return false; } -bool ObjectMonitor::TryLock_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { +bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { // The monitor is private to or already owned by locking_thread which must be suspended. // So this code may only contend with deflation. assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); @@ -355,7 +355,7 @@ bool ObjectMonitor::TryLock_with_contention_mark(JavaThread* locking_thread, Obj void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { // Used by LightweightSynchronizer::inflate_and_enter in deoptimization path to enter for another thread. - bool success = ObjectMonitor::TryLock_with_contention_mark(locking_thread, contention_mark); + bool success = TryLockWithContentionMark(locking_thread, contention_mark); assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", @@ -373,7 +373,7 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { return false; } - bool success = ObjectMonitor::TryLock_with_contention_mark(locking_thread, contention_mark); + bool success = TryLockWithContentionMark(locking_thread, contention_mark); assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}", @@ -382,27 +382,31 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { return true; } -bool ObjectMonitor::try_enter(JavaThread* current) { - // TryLock avoids the CAS +bool ObjectMonitor::try_enter(JavaThread* current, bool check_owner) { + // TryLock avoids the CAS and handles deflation. TryLockResult r = TryLock(current); if (r == TryLockResult::Success) { assert(_recursions == 0, "invariant"); return true; } - if (r == TryLockResult::HasOwner && owner() == current) { - _recursions++; - return true; - } + // Set check_owner to false (it's default value is true) if you want + // to use ObjectMonitor::try_enter() as a public way of doing TryLock(). + // Used this way in SharedRuntime::monitor_exit_helper(). + if (check_owner) { + if (r == TryLockResult::HasOwner && owner() == current) { + _recursions++; + return true; + } - void* cur = owner_raw(); - if (LockingMode == LM_LEGACY && current->is_lock_owned((address)cur)) { - assert(_recursions == 0, "internal state error"); - _recursions = 1; - set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. - return true; + void* cur = owner_raw(); + if (LockingMode == LM_LEGACY && current->is_lock_owned((address)cur)) { + assert(_recursions == 0, "internal state error"); + _recursions = 1; + set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. + return true; + } } - return false; } @@ -575,7 +579,7 @@ ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { // Treat deflation as interference. return TryLockResult::Interference; } - if (TryLock_with_contention_mark(current, contention_mark)) { + if (TryLockWithContentionMark(current, contention_mark)) { assert(_recursions == 0, "invariant"); return TryLockResult::Success; } else { @@ -1220,51 +1224,11 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // drain _cxq, so we need to reacquire the lock. If we fail // to reacquire the lock the responsibility for ensuring succession // falls to the new owner. - // - void* owner = try_set_owner_from(nullptr, current); - if (owner != nullptr) { - if (owner != DEFLATER_MARKER) { - // Owned by another thread, so we are done. - return; - } - - // The deflator owns the lock. Now try to cancel the async - // deflation. As part of the contended enter protocol, - // contentions was incremented to a positive value before - // EnterI() was called and that prevents the deflater thread - // from winning the last part of the 2-part async deflation - // protocol. After EnterI() returns to enter(), contentions is - // decremented because the caller now owns the monitor. We need - // to increment contentions an extra time here (done by the - // ObjectMonitorContentionMark constructor) to prevent the - // deflater thread from winning the last part of the 2-part - // async deflation protocol after the regular decrement occurs - // in enter(). - ObjectMonitorContentionMark contention_mark(this); - if (is_being_async_deflated()) { - assert((intptr_t(_EntryList) | intptr_t(_cxq)) == 0 || _succ != nullptr, ""); - // Mr. Deflator won the race, so we are done. - return; - } - - // Now try to take the ownership. - if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) { - // We successfully cancelled the in-progress async deflation. - // By extending the lifetime of the contention_mark, we - // prevent the destructor from decrementing the contentions - // counter when the contention_mark goes out of scope. Instead - // ObjectMonitor::deflate_monitor() will decrement contentions - // after it recognizes that the async deflation was cancelled. - contention_mark.extend(); - } else { - owner = try_set_owner_from(nullptr, current); - if (owner != nullptr) { - // The lock is owned by another thread, who is now - // responsible for ensuring succession, so we are done. - return; - } - } + if (TryLock(current) != TryLockResult::Success) { + // Some other thread acquired the lock (or the monitor was + // deflated). Either way we are done. + return; } guarantee(owner_raw() == current, "invariant"); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 1285b44d06b52..1c4074dd094b3 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -348,7 +348,7 @@ class ObjectMonitor : public CHeapObj { void enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); bool enter_for(JavaThread* locking_thread); bool enter(JavaThread* current); - bool try_enter(JavaThread* current); + bool try_enter(JavaThread* current, bool check_owner = true); bool spin_enter(JavaThread* current); void enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark& contention_mark); void exit(JavaThread* current, bool not_suspended = true); @@ -356,9 +356,6 @@ class ObjectMonitor : public CHeapObj { void notify(TRAPS); void notifyAll(TRAPS); - enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; - TryLockResult TryLock(JavaThread* current); - void print() const; #ifdef ASSERT void print_debug_style_on(outputStream* st) const; @@ -373,10 +370,16 @@ class ObjectMonitor : public CHeapObj { void INotify(JavaThread* current); ObjectWaiter* DequeueWaiter(); void DequeueSpecificWaiter(ObjectWaiter* waiter); - bool TryLock_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); void EnterI(JavaThread* current); void ReenterI(JavaThread* current, ObjectWaiter* current_node); void UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node); + + + enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; + + bool TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); + TryLockResult TryLock(JavaThread* current); + bool TrySpin(JavaThread* current); bool short_fixed_spin(JavaThread* current, int spin_count, bool adapt); void ExitEpilog(JavaThread* current, ObjectWaiter* Wakee); diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index f66fb89c1707a..893f33439089b 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1967,10 +1967,10 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea current->clear_unlocked_inflated_monitor(); // We need to reacquire the lock before we can call ObjectSynchronizer::exit(). - if (m->TryLock(current) != ObjectMonitor::TryLockResult::Success) { + if (!m->try_enter(current, false)) { // Some other thread acquired the lock (or the monitor was // deflated). Either way we are done. - current->inc_held_monitor_count(-1); + current->dec_held_monitor_count(); return; } } From 8140570fe12c875df6c8e0c2f7a6e98b7215a45a Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 25 Sep 2024 14:20:18 +0200 Subject: [PATCH 10/11] Update three, after the review --- src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 2 + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 2 + src/hotspot/cpu/s390/macroAssembler_s390.cpp | 2 + src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 8 ++-- src/hotspot/share/runtime/objectMonitor.cpp | 48 ++++++++++--------- src/hotspot/share/runtime/objectMonitor.hpp | 3 +- .../share/runtime/objectMonitor.inline.hpp | 16 +++++-- src/hotspot/share/runtime/sharedRuntime.cpp | 6 ++- .../org/openjdk/bench/vm/lang/LockUnlock.java | 5 +- 9 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index c1eabccd9af0b..8d8e39b8bbc00 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2717,6 +2717,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe bind(notRecursive); // Set owner to null. + // Release to satisfy the JMM release(); li(temp, 0); std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); @@ -3052,6 +3053,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f const Register t2 = tmp2; // Set owner to null. + // Release to satisfy the JMM release(); li(t, 0); std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 55a99e4f039c0..75f87e35adf41 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -227,6 +227,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, la(owner_addr, Address(tmp, ObjectMonitor::owner_offset())); // Set owner to null. + // Release to satisfy the JMM membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); sd(zr, Address(owner_addr)); // We need a full fence after clearing owner to avoid stranding. @@ -558,6 +559,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); // Set owner to null. + // Release to satisfy the JMM membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); sd(zr, Address(tmp2_owner_addr)); // We need a full fence after clearing owner to avoid stranding. diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index d8580faa128c4..af281345b1477 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3658,6 +3658,7 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg NearLabel check_succ, set_eq_unlocked; // Set owner to null. + // Release to satisfy the JMM z_release(); z_lghi(temp, 0); z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); @@ -6500,6 +6501,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis NearLabel check_succ, set_eq_unlocked; // Set owner to null. + // Release to satisfy the JMM z_release(); z_lghi(tmp2, 0); z_stg(tmp2 /*=0*/, owner_address); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 30ce3de3af440..839745f76ec6a 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -470,7 +470,8 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t bind(LNotRecursive); - // Release lock. + // Set owner to null. + // Release to satisfy the JMM movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); // We need a full fence after clearing owner to avoid stranding. // StoreLoad achieves this. @@ -495,8 +496,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg); #endif - // Intentional fall-through into slow path - orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); @@ -800,7 +799,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, cmpptr(recursions_address, 0); jccb(Assembler::notZero, recursive); - // Release lock. + // Set owner to null. + // Release to satisfy the JMM movptr(owner_address, NULL_WORD); // We need a full fence after clearing owner to avoid stranding. // StoreLoad achieves this. diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 6a178f159b8f7..c1e276a25046f 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -326,12 +326,15 @@ bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, Object // Racing with deflation. prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread); if (prev_owner == DEFLATER_MARKER) { - // We successfully cancelled the in-progress async deflation. - // By extending the lifetime of the contention_mark, we - // prevent the destructor from decrementing the contentions - // counter when the contention_mark goes out of scope. Instead - // ObjectMonitor::deflate_monitor() will decrement contentions - // after it recognizes that the async deflation was cancelled. + // We successfully cancelled the in-progress async deflation by + // changing owner from DEFLATER_MARKER to current. We now extend + // the lifetime of the contention_mark (e.g. contentions++) here + // to prevent the deflater thread from winning the last part of + // the 2-part async deflation protocol after the regular + // decrement occurs when the contention_mark goes out of + // scope. ObjectMonitor::deflate_monitor() which is called by + // the deflater thread who will decrement contentions after it + // recognizes that the async deflation was cancelled. contention_mark.extend(); success = true; } else if (prev_owner == nullptr) { @@ -382,7 +385,7 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) { return true; } -bool ObjectMonitor::try_enter(JavaThread* current, bool check_owner) { +bool ObjectMonitor::try_enter(JavaThread* current, bool check_for_recursion) { // TryLock avoids the CAS and handles deflation. TryLockResult r = TryLock(current); if (r == TryLockResult::Success) { @@ -390,22 +393,23 @@ bool ObjectMonitor::try_enter(JavaThread* current, bool check_owner) { return true; } - // Set check_owner to false (it's default value is true) if you want - // to use ObjectMonitor::try_enter() as a public way of doing TryLock(). - // Used this way in SharedRuntime::monitor_exit_helper(). - if (check_owner) { - if (r == TryLockResult::HasOwner && owner() == current) { - _recursions++; - return true; - } + // If called from SharedRuntime::monitor_exit_helper(), we know that + // this thread doesn't already own the lock. + if (!check_for_recursion) { + return false; + } - void* cur = owner_raw(); - if (LockingMode == LM_LEGACY && current->is_lock_owned((address)cur)) { - assert(_recursions == 0, "internal state error"); - _recursions = 1; - set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. - return true; - } + if (r == TryLockResult::HasOwner && owner() == current) { + _recursions++; + return true; + } + + void* cur = owner_raw(); + if (LockingMode == LM_LEGACY && current->is_lock_owned((address)cur)) { + assert(_recursions == 0, "internal state error"); + _recursions = 1; + set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. + return true; } return false; } diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 1c4074dd094b3..30d2e5094162d 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -223,7 +223,6 @@ class ObjectMonitor : public CHeapObj { static ByteSize cxq_offset() { return byte_offset_of(ObjectMonitor, _cxq); } static ByteSize succ_offset() { return byte_offset_of(ObjectMonitor, _succ); } static ByteSize EntryList_offset() { return byte_offset_of(ObjectMonitor, _EntryList); } - static ByteSize contentions_offset() { return byte_offset_of(ObjectMonitor, _contentions); } // ObjectMonitor references can be ORed with markWord::monitor_value // as part of the ObjectMonitor tagging mechanism. When we combine an @@ -348,7 +347,7 @@ class ObjectMonitor : public CHeapObj { void enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); bool enter_for(JavaThread* locking_thread); bool enter(JavaThread* current); - bool try_enter(JavaThread* current, bool check_owner = true); + bool try_enter(JavaThread* current, bool check_for_recursion = true); bool spin_enter(JavaThread* current); void enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark& contention_mark); void exit(JavaThread* current, bool not_suspended = true); diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp index def2cc7e332be..6d3c6ff24c38b 100644 --- a/src/hotspot/share/runtime/objectMonitor.inline.hpp +++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp @@ -206,19 +206,29 @@ inline void ObjectMonitor::set_next_om(ObjectMonitor* new_value) { Atomic::store(&_next_om, new_value); } +// Block out deflation. inline ObjectMonitorContentionMark::ObjectMonitorContentionMark(ObjectMonitor* monitor) : _monitor(monitor), _extended(false) { + // Contentions is incremented to a positive value as part of the + // contended enter protocol, which prevents the deflater thread from + // winning the last part of the 2-part async deflation + // protocol. See: ObjectMonitor::deflate_monitor() and + // ObjectMonitor::TryLockWithContentionMark(). _monitor->add_to_contentions(1); } inline ObjectMonitorContentionMark::~ObjectMonitorContentionMark() { - if (!_extended) { - _monitor->add_to_contentions(-1); - } + // Decrement contentions when the contention mark goes out of + // scope. This opens up for deflation, if the contention mark + // hasn't been extended. + _monitor->add_to_contentions(-1); } inline void ObjectMonitorContentionMark::extend() { + // Used by ObjectMonitor::TryLockWithContentionMark() to "extend the + // lifetime" of the contention mark. assert(!_extended, "extending twice is probably a bad design"); + _monitor->add_to_contentions(1); _extended = true; } diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 64bbdfdbc7ae5..9ac7c97319f2e 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1966,14 +1966,16 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea // Check if C2_MacroAssembler::fast_unlock() or // C2_MacroAssembler::fast_unlock_lightweight() unlocked an inflated - // monitor before going slow path. + // monitor before going slow path. Since there is no safepoint + // polling when calling into the VM, we can be sure that the monitor + // hasn't been deallocated. ObjectMonitor* m = current->unlocked_inflated_monitor(); if (m != nullptr) { assert(m->owner_raw() != current, "must be"); current->clear_unlocked_inflated_monitor(); // We need to reacquire the lock before we can call ObjectSynchronizer::exit(). - if (!m->try_enter(current, false)) { + if (!m->try_enter(current, /*check_for_recursion*/ false)) { // Some other thread acquired the lock (or the monitor was // deflated). Either way we are done. current->dec_held_monitor_count(); diff --git a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java index c43eb05b0fed4..39c8569532ec4 100644 --- a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java +++ b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java @@ -309,8 +309,9 @@ private synchronized int fact(int n) { } /** - * With two threads lockObject1 will be contended so should be - * inflated. + * With three threads lockObject1 will be contended so should be + * inflated. Three threads is also needed to ensure a high level + * of code coverage in the locking code. */ @Threads(3) @Benchmark From 1dcdc176afa272b7d116e0ac160b1c988d91f865 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Fri, 27 Sep 2024 09:33:46 +0200 Subject: [PATCH 11/11] Update four, after the review --- src/hotspot/share/runtime/objectMonitor.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index c1e276a25046f..755d49d2c6c58 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -307,9 +307,6 @@ bool ObjectMonitor::enter_is_async_deflating() { } bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { - // The monitor is private to or already owned by locking_thread which must be suspended. - // So this code may only contend with deflation. - assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); assert(contention_mark._monitor == this, "must be"); assert(!is_being_async_deflated(), "must be"); @@ -333,7 +330,7 @@ bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, Object // the 2-part async deflation protocol after the regular // decrement occurs when the contention_mark goes out of // scope. ObjectMonitor::deflate_monitor() which is called by - // the deflater thread who will decrement contentions after it + // the deflater thread will decrement contentions after it // recognizes that the async deflation was cancelled. contention_mark.extend(); success = true; @@ -358,6 +355,9 @@ bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, Object void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) { // Used by LightweightSynchronizer::inflate_and_enter in deoptimization path to enter for another thread. + // The monitor is private to or already owned by locking_thread which must be suspended. + // So this code may only contend with deflation. + assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); bool success = TryLockWithContentionMark(locking_thread, contention_mark); assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT @@ -367,6 +367,9 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O bool ObjectMonitor::enter_for(JavaThread* locking_thread) { // Used by ObjectSynchronizer::enter_for() to enter for another thread. + // The monitor is private to or already owned by locking_thread which must be suspended. + // So this code may only contend with deflation. + assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); // Block out deflation as soon as possible. ObjectMonitorContentionMark contention_mark(this);