From e9f447bf1b9d67033aaa7605d7b8482d1e1f3493 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Mon, 3 Feb 2025 17:21:02 +0100 Subject: [PATCH 1/8] 8343840: Rewrite the ObjectMonitor lists --- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 8 +- src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 12 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 8 +- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 15 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 13 +- src/hotspot/share/jvmci/vmStructs_jvmci.cpp | 2 +- src/hotspot/share/runtime/objectMonitor.cpp | 377 +++++++++--------- src/hotspot/share/runtime/objectMonitor.hpp | 10 +- 8 files changed, 212 insertions(+), 233 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index e3d197a457215..88205a3dda679 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -312,10 +312,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). + // Check if the EntryList is empty. ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset())); - ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset())); - orr(rscratch1, rscratch1, tmpReg); cmp(rscratch1, zr); br(Assembler::EQ, cont); // If so we are done. @@ -635,10 +633,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). + // Check if the EntryList is empty. ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset())); - ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset())); - orr(rscratch1, rscratch1, t3_t); cmp(rscratch1, zr); br(Assembler::EQ, unlocked); // If so we are done. diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 1267fa0e5166e..4c74e6bfb73a1 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2957,10 +2957,8 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). - ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); - ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header); - orr(temp, temp, displaced_header); // Will be 0 if both are 0. + // Check if the EntryList is empty. + ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); cmpdi(flag, temp, 0); beq(flag, success); // If so we are done. @@ -3298,8 +3296,6 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f bind(not_recursive); - const Register t2 = tmp2; - // Set owner to null. // Release to satisfy the JMM release(); @@ -3309,10 +3305,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). + // Check if the EntryList is empty. ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); - ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor); - orr(t, t, t2); cmpdi(CR0, t, 0); beq(CR0, unlocked); // If so we are done. diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 7564df6b861cf..248cb7b286937 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -233,10 +233,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). + // Check if the EntryList is empty. ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); - ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset())); - orr(t0, t0, tmp1Reg); beqz(t0, unlocked); // If so we are done. // Check if there is a successor. @@ -569,10 +567,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). + // Check if the EntryList is empty. ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); - ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); - orr(t0, t0, tmp3_t); beqz(t0, unlocked); // If so we are done. // Check if there is a successor. diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 83a5c61bfc6c1..7d667f22bb5a7 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3931,7 +3931,7 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg bind(not_recursive); - NearLabel check_succ, set_eq_unlocked; + NearLabel set_eq_unlocked; // Set owner to null. // Release to satisfy the JMM @@ -3941,14 +3941,10 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg // We need a full fence after clearing owner to avoid stranding. z_fence(); - // Check if the entry lists are empty (EntryList first - by convention). + // Check if the EntryList is empty. load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - z_brne(check_succ); - load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); z_bre(done); // If so we are done. - bind(check_succ); - // Check if there is a successor. load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ))); z_brne(set_eq_unlocked); // If so we are done. @@ -6794,7 +6790,6 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; - const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; @@ -6813,7 +6808,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis bind(not_recursive); - NearLabel check_succ, set_eq_unlocked; + NearLabel set_eq_unlocked; // Set owner to null. // Release to satisfy the JMM @@ -6825,12 +6820,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis // Check if the entry lists are empty (EntryList first - by convention). load_and_test_long(tmp2, EntryList_address); - z_brne(check_succ); - load_and_test_long(tmp2, cxq_address); z_bre(unlocked); // If so we are done. - bind(check_succ); - // Check if there is a successor. load_and_test_long(tmp2, succ_address); z_brne(set_eq_unlocked); // If so we are done. diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 87583ddabd5e9..a01300498fe0e 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -415,7 +415,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // as java routines or native JNI code called by this thread might // have released the lock. // Refer to the comments in synchronizer.cpp for how we might encode extra - // state in _succ so we can avoid fetching EntryList|cxq. + // state in _succ so we can avoid fetching EntryList. // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how @@ -447,9 +447,8 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); + // Check if the EntryList is empty. + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), NULL_WORD); jccb(Assembler::zero, LSuccess); // If so we are done. // Check if there is a successor. @@ -767,7 +766,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, } const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; - const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; @@ -785,9 +783,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // StoreLoad achieves this. membar(StoreLoad); - // Check if the entry lists are empty (EntryList first - by convention). - movptr(reg_rax, EntryList_address); - orptr(reg_rax, cxq_address); + // Check if the EntryList is empty. + cmpptr(EntryList_address, NULL_WORD); jccb(Assembler::zero, unlocked); // If so we are done. // Check if there is a successor. diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index 96d369678f792..a4dbb14104fa9 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -329,7 +329,7 @@ \ volatile_nonstatic_field(ObjectMonitor, _owner, int64_t) \ volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \ - volatile_nonstatic_field(ObjectMonitor, _cxq, ObjectWaiter*) \ + volatile_nonstatic_field(ObjectMonitor, _EntryListTail, ObjectWaiter*) \ volatile_nonstatic_field(ObjectMonitor, _EntryList, ObjectWaiter*) \ volatile_nonstatic_field(ObjectMonitor, _succ, int64_t) \ volatile_nonstatic_field(ObjectMonitor, _stack_locker, BasicLock*) \ diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 1399b1e561203..2eeea7d1b8f41 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -256,7 +256,7 @@ ObjectMonitor::ObjectMonitor(oop object) : _next_om(nullptr), _recursions(0), _EntryList(nullptr), - _cxq(nullptr), + _EntryListTail(nullptr), _succ(NO_OWNER), _SpinDuration(ObjectMonitor::Knob_SpinLimit), _contentions(0), @@ -657,6 +657,49 @@ ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference; } +// Push "current" onto the front of the _EntryList. Once on _EntryList, +// current stays on-queue until it acquires the lock. +void ObjectMonitor::AddToEntryList(JavaThread* current, ObjectWaiter* node) { + node->_prev = nullptr; + node->TState = ObjectWaiter::TS_ENTER; + + for (;;) { + ObjectWaiter* front = _EntryList; + + node->_next = front; + if (Atomic::cmpxchg(&_EntryList, front, node) == front) { + return; + } + } +} + +// Push "current" onto the front of the EntryList. +// If the _EntryList was changed during our push operation, we try to +// lock the monitor. Returns true if we locked the monitor, and false +// if we added current to _EntryList. Once on _EntryList, current +// stays on-queue until it acquires the lock. +bool ObjectMonitor::TryLockOrAddToEntryList(JavaThread* current, ObjectWaiter* node) { + node->_prev = nullptr; + node->TState = ObjectWaiter::TS_ENTER; + + for (;;) { + ObjectWaiter* front = _EntryList; + + node->_next = front; + if (Atomic::cmpxchg(&_EntryList, front, node) == front) { + return false; + } + + // Interference - the CAS failed because _EntryList changed. Just retry. + // As an optional optimization we retry the lock. + if (TryLock(current) == TryLockResult::Success) { + assert(!has_successor(current), "invariant"); + assert(has_owner(current), "invariant"); + return true; + } + } +} + // Deflate the specified ObjectMonitor if not in-use. Returns true if it // was deflated and false otherwise. // @@ -727,8 +770,6 @@ bool ObjectMonitor::deflate_monitor(Thread* current) { guarantee(contentions() < 0, "must be negative: contentions=%d", contentions()); guarantee(_waiters == 0, "must be 0: waiters=%d", _waiters); - guarantee(_cxq == nullptr, "must be no contending threads: cxq=" - INTPTR_FORMAT, p2i(_cxq)); guarantee(_EntryList == nullptr, "must be no entering threads: EntryList=" INTPTR_FORMAT, p2i(_EntryList)); @@ -816,7 +857,6 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { ss->print("is_busy: waiters=%d" ", contentions=%d" ", owner=" INT64_FORMAT - ", cxq=" PTR_FORMAT ", EntryList=" PTR_FORMAT, _waiters, (contentions() > 0 ? contentions() : 0), @@ -825,7 +865,6 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { // ignores DEFLATER_MARKER values. ? NO_OWNER : owner_raw(), - p2i(_cxq), p2i(_EntryList)); return ss->base(); } @@ -859,7 +898,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(!has_successor(current), "invariant"); assert(!has_owner(current), "invariant"); - // Enqueue "current" on ObjectMonitor's _cxq. + // Enqueue "current" on ObjectMonitor's _EntryList. // // Node acts as a proxy for current. // As an aside, if were to ever rewrite the synchronization code mostly @@ -870,31 +909,16 @@ void ObjectMonitor::EnterI(JavaThread* current) { ObjectWaiter node(current); current->_ParkEvent->reset(); - node._prev = (ObjectWaiter*) 0xBAD; - node.TState = ObjectWaiter::TS_CXQ; - - // Push "current" onto the front of the _cxq. - // Once on cxq/EntryList, current stays on-queue until it acquires the lock. - // Note that spinning tends to reduce the rate at which threads - // enqueue and dequeue on EntryList|cxq. - ObjectWaiter* nxt; - for (;;) { - node._next = nxt = _cxq; - if (Atomic::cmpxchg(&_cxq, nxt, &node) == nxt) break; - // Interference - the CAS failed because _cxq changed. Just retry. - // As an optional optimization we retry the lock. - if (TryLock(current) == TryLockResult::Success) { - assert(!has_successor(current), "invariant"); - assert(has_owner(current), "invariant"); - return; - } + if (TryLockOrAddToEntryList(current, &node)) { + return; // We got the lock. } + // This thread is now added to the _EntryList. // The lock might have been released while this thread was occupied queueing - // itself onto _cxq. To close the race and avoid "stranding" and + // itself onto _EntryList. To close the race and avoid "stranding" and // progress-liveness failure we must resample-retry _owner before parking. - // Note the Dekker/Lamport duality: ST cxq; MEMBAR; LD Owner. + // Note the Dekker/Lamport duality: ST _EntryList; MEMBAR; LD Owner. // In this case the ST-MEMBAR is accomplished with CAS(). // // TODO: Defer all thread state transitions until park-time. @@ -970,15 +994,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { } // Egress : - // current has acquired the lock -- Unlink current from the cxq or EntryList. - // Normally we'll find current on the EntryList . - // From the perspective of the lock owner (this thread), the - // EntryList is stable and cxq is prepend-only. - // The head of cxq is volatile but the interior is stable. - // In addition, current.TState is stable. - - assert(has_owner(current), "invariant"); - + // Current has acquired the lock -- Unlink current from the _EntryList. UnlinkAfterAcquire(current, &node); if (has_successor(current)) { clear_successor(); @@ -1027,7 +1043,7 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) { for (;;) { ObjectWaiter::TStates v = currentNode->TState; - guarantee(v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant"); + guarantee(v == ObjectWaiter::TS_ENTER, "invariant"); assert(!has_owner(current), "invariant"); // This thread has been notified so try to reacquire the lock. @@ -1077,14 +1093,7 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) { OM_PERFDATA_OP(FutileWakeups, inc()); } - // current has acquired the lock -- Unlink current from the cxq or EntryList . - // Normally we'll find current on the EntryList. - // Unlinking from the EntryList is constant-time and atomic-free. - // From the perspective of the lock owner (this thread), the - // EntryList is stable and cxq is prepend-only. - // The head of cxq is volatile but the interior is stable. - // In addition, current.TState is stable. - + // Current has acquired the lock -- Unlink current from the _EntryList. assert(has_owner(current), "invariant"); assert_mark_word_consistency(); UnlinkAfterAcquire(current, currentNode); @@ -1109,27 +1118,15 @@ bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current, ObjectWaiter* waite oop vthread = current->vthread(); ObjectWaiter* node = waiter != nullptr ? waiter : new ObjectWaiter(vthread, this); - node->_prev = (ObjectWaiter*) 0xBAD; - node->TState = ObjectWaiter::TS_CXQ; - - // Push node associated with vthread onto the front of the _cxq. - ObjectWaiter* nxt; - for (;;) { - node->_next = nxt = _cxq; - if (Atomic::cmpxchg(&_cxq, nxt, node) == nxt) break; - - // Interference - the CAS failed because _cxq changed. Just retry. - // As an optional optimization we retry the lock. - if (TryLock(current) == TryLockResult::Success) { - assert(has_owner(current), "invariant"); - assert(!has_successor(current), "invariant"); - if (waiter == nullptr) delete node; // for Object.wait() don't delete yet - return true; - } + if (TryLockOrAddToEntryList(current, node)) { + // We got the lock. + if (waiter == nullptr) delete node; // for Object.wait() don't delete yet + return true; } + // This thread is now added to the EntryList. // We have to try once more since owner could have exited monitor and checked - // _cxq before we added the node to the queue. + // _EntryList before we added the node to the queue. if (TryLock(current) == TryLockResult::Success) { assert(has_owner(current), "invariant"); UnlinkAfterAcquire(current, node); @@ -1163,7 +1160,7 @@ bool ObjectMonitor::resume_operation(JavaThread* current, ObjectWaiter* node, Co // Retry acquiring monitor... int state = node->TState; - guarantee(state == ObjectWaiter::TS_ENTER || state == ObjectWaiter::TS_CXQ, "invariant"); + guarantee(state == ObjectWaiter::TS_ENTER, "invariant"); if (TryLock(current) == TryLockResult::Success) { VThreadEpilog(current, node); @@ -1218,71 +1215,151 @@ void ObjectMonitor::VThreadEpilog(JavaThread* current, ObjectWaiter* node) { } } -// By convention we unlink a contending thread from EntryList|cxq immediately +// Return the tail of the _EntryList. If the tail is currently not +// known, find it by walking from the head of _EntryList, and while +// doing so assign the _prev pointers to create a DLL list. +ObjectWaiter *ObjectMonitor::EntryListTail(JavaThread* current) { + assert(has_owner(current), "invariant"); + ObjectWaiter *w = _EntryListTail; + if (w != nullptr) { + return w; + } + w = _EntryList; + assert(w != nullptr, "invariant"); + if (w->_next == nullptr) { + _EntryListTail = w; + return w; + } + ObjectWaiter *prev = nullptr; + while (w != nullptr) { + assert(w->TState == ObjectWaiter::TS_ENTER, "invariant"); + w->_prev = prev; + prev = w; + w = w->_next; + } + _EntryListTail = prev; + return prev; +} + +static void set_bad_pointers(ObjectWaiter* currentNode) +{ +#ifdef ASSERT + // Diagnostic hygiene ... + currentNode->_prev = (ObjectWaiter*) 0xBAD; + currentNode->_next = (ObjectWaiter*) 0xBAD; + currentNode->TState = ObjectWaiter::TS_RUN; +#endif +} + +// By convention we unlink a contending thread from _EntryList immediately // after the thread acquires the lock in ::enter(). Equally, we could defer // unlinking the thread until ::exit()-time. +// The head of _EntryList is volatile but the interior is stable. +// In addition, current.TState is stable. void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* currentNode) { assert(has_owner(current), "invariant"); assert((!currentNode->is_vthread() && currentNode->thread() == current) || (currentNode->is_vthread() && currentNode->vthread() == current->vthread()), "invariant"); - if (currentNode->TState == ObjectWaiter::TS_ENTER) { - // Normal case: remove current from the DLL EntryList . - // This is a constant-time operation. - ObjectWaiter* nxt = currentNode->_next; - ObjectWaiter* prv = currentNode->_prev; - if (nxt != nullptr) nxt->_prev = prv; - if (prv != nullptr) prv->_next = nxt; - if (currentNode == _EntryList) _EntryList = nxt; - assert(nxt == nullptr || nxt->TState == ObjectWaiter::TS_ENTER, "invariant"); - assert(prv == nullptr || prv->TState == ObjectWaiter::TS_ENTER, "invariant"); - } else { - assert(currentNode->TState == ObjectWaiter::TS_CXQ, "invariant"); - // Inopportune interleaving -- current is still on the cxq. - // This usually means the enqueue of self raced an exiting thread. - // Normally we'll find current near the front of the cxq, so - // dequeueing is typically fast. If needbe we can accelerate - // this with some MCS/CHL-like bidirectional list hints and advisory - // back-links so dequeueing from the interior will normally operate - // in constant-time. - // Dequeue current from either the head (with CAS) or from the interior - // with a linear-time scan and normal non-atomic memory operations. - // CONSIDER: if current is on the cxq then simply drain cxq into EntryList - // and then unlink current from EntryList. We have to drain eventually, - // so it might as well be now. - - ObjectWaiter* v = _cxq; + // Check if we are unlinking the last element in the _EntryList. + // This is by far the most common case. + if (currentNode->_next == nullptr) { + assert(_EntryListTail == nullptr || _EntryListTail == currentNode, "invariant"); + + ObjectWaiter *v = _EntryList; + if (v == currentNode) { + // The currentNode is the only element in _EntryList. + if (Atomic::cmpxchg(&_EntryList, v, (ObjectWaiter*)nullptr) == v) { + _EntryListTail = nullptr; + set_bad_pointers(currentNode); + return; + } + // The CAS above can fail from interference IFF a new element + // was added to the head of _EntryList. In that case currentNode + // must be in the interior and can no longer be at the head of + // _EntryList. + v = _EntryList; + assert(v != currentNode, "invariant"); + if (v->_next == currentNode) { + // The currentNode is located just after the head of _EntryList. + v->_next= nullptr; + _EntryListTail = v; + set_bad_pointers(currentNode); + return; + } + } + if (currentNode->_prev != nullptr) { + // The currentNode is the last element in _EntryList and we know + // which element is the previous one. + assert(_EntryList != currentNode, "invariant"); + _EntryListTail = currentNode->_prev; + _EntryListTail->_next = nullptr; + set_bad_pointers(currentNode); + return; + } + // Here we know that the currentNode is the last element in + // _EntryList but we don't we know which element is the previous + // one. + } + + if (currentNode->_prev == nullptr) { + // We don't know which element is the previous one, so we need to find it. + ObjectWaiter *v = _EntryList; assert(v != nullptr, "invariant"); - if (v != currentNode || Atomic::cmpxchg(&_cxq, v, currentNode->_next) != v) { - // The CAS above can fail from interference IFF a "RAT" arrived. - // In that case current must be in the interior and can no longer be - // at the head of cxq. + if (v != currentNode || Atomic::cmpxchg(&_EntryList, v, currentNode->_next) != v) { + // The CAS above can fail from interference IFF a new element + // was added to the head of _EntryList. In that case currentNode + // must be in the interior and can no longer be at the head of + // _EntryList. if (v == currentNode) { - assert(_cxq != v, "invariant"); - v = _cxq; // CAS above failed - start scan at head of list + assert(_EntryList != v, "invariant"); + v = _EntryList; // CAS above failed - start scan at head of list } ObjectWaiter* p; ObjectWaiter* q = nullptr; for (p = v; p != nullptr && p != currentNode; p = p->_next) { + assert(p->TState == ObjectWaiter::TS_ENTER, "invariant"); + p->_prev = q; // We might as well assign the DLL _prev-pointer. q = p; - assert(p->TState == ObjectWaiter::TS_CXQ, "invariant"); } assert(v != currentNode, "invariant"); - assert(p == currentNode, "Node not found on cxq"); - assert(p != _cxq, "invariant"); + assert(p == currentNode, "Node not found on EntryList"); + assert(p != _EntryList, "invariant"); assert(q != nullptr, "invariant"); assert(q->_next == p, "invariant"); q->_next = p->_next; + if (p->_next != nullptr) { + p->_next->_prev = q; + } else { + assert(_EntryListTail == nullptr || currentNode == _EntryListTail, "invariant"); + _EntryListTail = currentNode->_prev; + } + } else if (v == currentNode) { + // The CAS above sucsessfully unlinked currentNode from the head of the _EntryList. + assert(_EntryList != v, "invariant"); + if (currentNode->_next != nullptr) { + currentNode->_next->_prev = nullptr; + } else { + assert(_EntryListTail == nullptr || _EntryListTail == currentNode, "invariant"); + _EntryListTail = nullptr; + } } + } else { + assert(currentNode->_next != nullptr, ""); + assert(currentNode->_prev != nullptr, ""); + assert(currentNode != _EntryList, ""); + assert(currentNode != _EntryListTail, ""); + + ObjectWaiter* nxt = currentNode->_next; + ObjectWaiter* prv = currentNode->_prev; + nxt->_prev = prv; + prv->_next = nxt; + assert(nxt->TState == ObjectWaiter::TS_ENTER, "invariant"); + assert(prv->TState == ObjectWaiter::TS_ENTER, "invariant"); } -#ifdef ASSERT - // Diagnostic hygiene ... - currentNode->_prev = (ObjectWaiter*) 0xBAD; - currentNode->_next = (ObjectWaiter*) 0xBAD; - currentNode->TState = ObjectWaiter::TS_RUN; -#endif + set_bad_pointers(currentNode); } // ----------------------------------------------------------------------------- @@ -1390,7 +1467,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { release_clear_owner(current); OrderAccess::storeload(); - if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || has_successor()) { + if (_EntryList == nullptr || has_successor()) { return; } // Other threads are blocked trying to acquire the lock. @@ -1438,6 +1515,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { w = _EntryList; if (w != nullptr) { + w = EntryListTail(current); // I'd like to write: guarantee (w->_thread != current). // But in practice an exiting thread may find itself on the EntryList. // Let's say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and @@ -1453,58 +1531,6 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { ExitEpilog(current, w); return; } - - // If we find that both _cxq and EntryList are null then just - // re-run the exit protocol from the top. - w = _cxq; - if (w == nullptr) continue; - - // Drain _cxq into EntryList - bulk transfer. - // First, detach _cxq. - // The following loop is tantamount to: w = swap(&cxq, nullptr) - for (;;) { - assert(w != nullptr, "Invariant"); - ObjectWaiter* u = Atomic::cmpxchg(&_cxq, w, (ObjectWaiter*)nullptr); - if (u == w) break; - w = u; - } - - assert(w != nullptr, "invariant"); - assert(_EntryList == nullptr, "invariant"); - - // Convert the LIFO SLL anchored by _cxq into a DLL. - // The list reorganization step operates in O(LENGTH(w)) time. - // It's critical that this step operate quickly as - // "current" still holds the outer-lock, restricting parallelism - // and effectively lengthening the critical section. - // Invariant: s chases t chases u. - // TODO-FIXME: consider changing EntryList from a DLL to a CDLL so - // we have faster access to the tail. - - _EntryList = w; - ObjectWaiter* q = nullptr; - ObjectWaiter* p; - for (p = w; p != nullptr; p = p->_next) { - guarantee(p->TState == ObjectWaiter::TS_CXQ, "Invariant"); - p->TState = ObjectWaiter::TS_ENTER; - p->_prev = q; - q = p; - } - - // We need to: ST EntryList; MEMBAR #storestore; ST _owner = nullptr - // The MEMBAR is satisfied by the release_store() operation in ExitEpilog(). - - // See if we can abdicate to a spinner instead of waking a thread. - // A primary goal of the implementation is to reduce the - // context-switch rate. - if (has_successor()) continue; - - w = _EntryList; - if (w != nullptr) { - guarantee(w->TState == ObjectWaiter::TS_ENTER, "invariant"); - ExitEpilog(current, w); - return; - } } } @@ -1784,7 +1810,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { } // The thread is now either on off-list (TS_RUN), - // on the EntryList (TS_ENTER), or on the cxq (TS_CXQ). + // or on the EntryList (TS_ENTER). // The Node's TState variable is stable from the perspective of this thread. // No other threads will asynchronously modify TState. guarantee(node.TState != ObjectWaiter::TS_WAIT, "invariant"); @@ -1838,7 +1864,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { NoPreemptMark npm(current); enter(current); } else { - guarantee(v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant"); + guarantee(v == ObjectWaiter::TS_ENTER, "invariant"); ReenterI(current, &node); node.wait_reenter_end(this); } @@ -1916,32 +1942,9 @@ void ObjectMonitor::INotify(JavaThread* current) { } } - iterator->TState = ObjectWaiter::TS_ENTER; - iterator->_notified = true; iterator->_notifier_tid = JFR_THREAD_ID(current); - - ObjectWaiter* list = _EntryList; - if (list != nullptr) { - assert(list->_prev == nullptr, "invariant"); - assert(list->TState == ObjectWaiter::TS_ENTER, "invariant"); - assert(list != iterator, "invariant"); - } - - // prepend to cxq - if (list == nullptr) { - iterator->_next = iterator->_prev = nullptr; - _EntryList = iterator; - } else { - iterator->TState = ObjectWaiter::TS_CXQ; - for (;;) { - ObjectWaiter* front = _cxq; - iterator->_next = front; - if (Atomic::cmpxchg(&_cxq, front, iterator) == front) { - break; - } - } - } + AddToEntryList(current, iterator); // _WaitSetLock protects the wait queue, not the EntryList. We could // move the add-to-EntryList operation, above, outside the critical section @@ -2053,7 +2056,7 @@ bool ObjectMonitor::VThreadWaitReenter(JavaThread* current, ObjectWaiter* node, // If this was an interrupted case, set the _interrupted boolean so that // once we re-acquire the monitor we know if we need to throw IE or not. ObjectWaiter::TStates state = node->TState; - bool was_notified = state == ObjectWaiter::TS_ENTER || state == ObjectWaiter::TS_CXQ; + bool was_notified = state == ObjectWaiter::TS_ENTER; assert(was_notified || state == ObjectWaiter::TS_RUN, ""); node->_interrupted = !was_notified && current->is_interrupted(false); @@ -2531,7 +2534,7 @@ void ObjectMonitor::print() const { print_on(tty); } // _next_om = 0x0000000000000000 // _recursions = 0 // _EntryList = 0x0000000000000000 -// _cxq = 0x0000000000000000 +// _EntryListTail = 0x0000000000000000 // _succ = 0x0000000000000000 // _SpinDuration = 5000 // _contentions = 0 @@ -2559,7 +2562,7 @@ void ObjectMonitor::print_debug_style_on(outputStream* st) const { st->print_cr(" _next_om = " INTPTR_FORMAT, p2i(next_om())); st->print_cr(" _recursions = %zd", _recursions); st->print_cr(" _EntryList = " INTPTR_FORMAT, p2i(_EntryList)); - st->print_cr(" _cxq = " INTPTR_FORMAT, p2i(_cxq)); + st->print_cr(" _EntryListTail = " INTPTR_FORMAT, p2i(_EntryListTail)); st->print_cr(" _succ = " INT64_FORMAT, successor()); st->print_cr(" _SpinDuration = %d", _SpinDuration); st->print_cr(" _contentions = %d", contentions()); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 94c4c242f8271..a95605e3a26c5 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -43,7 +43,7 @@ class ContinuationWrapper; class ObjectWaiter : public CHeapObj { public: - enum TStates : uint8_t { TS_UNDEF, TS_READY, TS_RUN, TS_WAIT, TS_ENTER, TS_CXQ }; + enum TStates : uint8_t { TS_UNDEF, TS_READY, TS_RUN, TS_WAIT, TS_ENTER }; ObjectWaiter* volatile _next; ObjectWaiter* volatile _prev; JavaThread* _thread; @@ -177,7 +177,7 @@ class ObjectMonitor : public CHeapObj { // The list is actually composed of WaitNodes, // acting as proxies for Threads. - ObjectWaiter* volatile _cxq; // LL of recently-arrived threads blocked on entry. + ObjectWaiter* volatile _EntryListTail; // LL of recently-arrived threads blocked on entry. int64_t volatile _succ; // Heir presumptive thread - used for futile wakeup throttling volatile int _SpinDuration; @@ -245,7 +245,6 @@ class ObjectMonitor : public CHeapObj { static ByteSize metadata_offset() { return byte_offset_of(ObjectMonitor, _metadata); } static ByteSize owner_offset() { return byte_offset_of(ObjectMonitor, _owner); } static ByteSize recursions_offset() { return byte_offset_of(ObjectMonitor, _recursions); } - static ByteSize cxq_offset() { return byte_offset_of(ObjectMonitor, _cxq); } static ByteSize succ_offset() { return byte_offset_of(ObjectMonitor, _succ); } static ByteSize EntryList_offset() { return byte_offset_of(ObjectMonitor, _EntryList); } @@ -275,7 +274,7 @@ class ObjectMonitor : public CHeapObj { bool is_busy() const { // TODO-FIXME: assert _owner == NO_OWNER implies _recursions = 0 - intptr_t ret_code = intptr_t(_waiters) | intptr_t(_cxq) | intptr_t(_EntryList); + intptr_t ret_code = intptr_t(_waiters) | intptr_t(_EntryList); int cnts = contentions(); // read once if (cnts > 0) { ret_code |= intptr_t(cnts); @@ -419,6 +418,7 @@ class ObjectMonitor : public CHeapObj { intx complete_exit(JavaThread* current); private: + void AddToEntryList(JavaThread* current, ObjectWaiter* node); void AddWaiter(ObjectWaiter* waiter); void INotify(JavaThread* current); ObjectWaiter* DequeueWaiter(); @@ -426,6 +426,7 @@ class ObjectMonitor : public CHeapObj { void EnterI(JavaThread* current); void ReenterI(JavaThread* current, ObjectWaiter* current_node); void UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node); + ObjectWaiter* EntryListTail(JavaThread* current); bool VThreadMonitorEnter(JavaThread* current, ObjectWaiter* node = nullptr); void VThreadWait(JavaThread* current, jlong millis); @@ -435,6 +436,7 @@ class ObjectMonitor : public CHeapObj { enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; bool TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); + bool TryLockOrAddToEntryList(JavaThread* current, ObjectWaiter* node); TryLockResult TryLock(JavaThread* current); bool TrySpin(JavaThread* current); From 755e4a4a3f46fb7387199850a3fdbd2df1dff7c6 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Tue, 11 Feb 2025 14:47:48 +0100 Subject: [PATCH 2/8] Updated theory of operations comment --- src/hotspot/share/runtime/objectMonitor.cpp | 133 +++++++++++++------- 1 file changed, 85 insertions(+), 48 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 2eeea7d1b8f41..f7b0c256968c0 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -127,19 +127,19 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // its owner_id (return value from owner_id_from()). // // * Invariant: A thread appears on at most one monitor list -- -// cxq, EntryList or WaitSet -- at any one time. +// EntryList or WaitSet -- at any one time. // -// * Contending threads "push" themselves onto the cxq with CAS +// * Contending threads "push" themselves onto the EntryList with CAS // and then spin/park. // If the thread is a virtual thread it will first attempt to // unmount itself. The virtual thread will first try to freeze // all frames in the heap. If the operation fails it will just // follow the regular path for platform threads. If the operation -// succeeds, it will push itself onto the cxq with CAS and then +// succeeds, it will push itself onto the EntryList with CAS and then // return back to Java to continue the unmount logic. // // * After a contending thread eventually acquires the lock it must -// dequeue itself from either the EntryList or the cxq. +// dequeue itself from the EntryList. // // * The exiting thread identifies and unparks an "heir presumptive" // tentative successor thread on the EntryList. In case the successor @@ -154,61 +154,98 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // // Succession is provided for by a policy of competitive handoff. // The exiting thread does _not_ grant or pass ownership to the -// successor thread. (This is also referred to as "handoff" succession"). +// successor thread. (This is also referred to as "handoff succession"). // Instead the exiting thread releases ownership and possibly wakes // a successor, so the successor can (re)compete for ownership of the lock. -// If the EntryList is empty but the cxq is populated the exiting -// thread will drain the cxq into the EntryList. It does so by -// by detaching the cxq (installing null with CAS) and folding -// the threads from the cxq into the EntryList. The EntryList is -// doubly linked, while the cxq is singly linked because of the -// CAS-based "push" used to enqueue recently arrived threads (RATs). +// +// The EntryList is a linked list, the first contending thread that +// "pushed" itself onto EntryList, will be the last thread in the +// list. Each newly pushed thread in EntryList will be linked trough +// its next pointer, and have its prev pointer set to null. Thus +// pushing six threads A-F (in that order) onto EntryList, will +// form a singly-linked list, see 1) below. +// +// Since the successor is chosen in FIFO order, the exiting thread +// needs to find the tail of the EntryList. This is done by walking +// from the EntryList head. While walking the list we also assign +// the prev pointers of each thread, essentially forming a doubly- +// linked list, see 2) below. +// +// Once we have formed a doubly-linked list it's easy to find the +// successor, wake it up, have it remove itself, and update the +// tail pointer, as seen in 2) and 3) below. +// +// At any time new threads can add themselves to the EntryList, see +// 4) and 5). +// +// If the thread that removes itself from the end of the list hasn't +// got any prev pointer, we just set the tail pointer to null, see +// 5) and 6). +// +// Next time we need to find the successor and the tail is null, we +// just start walking from the EntryList head again forming a new +// doubly-linked list, see 6) and 7) below. +// +// 1) EntryList ->F->E->D->C->B->A->null +// EntryListTail ->null +// +// 2) EntryList ->F<=>E<=>D<=>C<=>B<=>A->null +// EntryListTail ----------------------^ +// +// 3) EntryList ->F<=>E<=>D<=>C<=>B->null +// EntryListTail ------------------^ +// +// 4) EntryList ->F->null +// EntryListTail --^ +// +// 5) EntryList ->I->H->G->F->null +// EntryListTail -----------^ +// +// 6) EntryList ->I->H->G->null +// EntryListTail ->null +// +// 7) EntryList ->I<=>H<=>G->null +// EntryListTail ----------^ // // * Concurrency invariants: // -// -- only the monitor owner may access or mutate the EntryList. +// -- only the monitor owner may assign prev pointers in EntryList. // The mutex property of the monitor itself protects the EntryList // from concurrent interference. -// -- Only the monitor owner may detach the cxq. +// -- Only the monitor owner may detach nodes from the EntryList. // // * The monitor entry list operations avoid locks, but strictly speaking // they're not lock-free. Enter is lock-free, exit is not. // For a description of 'Methods and apparatus providing non-blocking access // to a resource,' see U.S. Pat. No. 7844973. // -// * The cxq can have multiple concurrent "pushers" but only one concurrent -// detaching thread. This mechanism is immune from the ABA corruption. -// More precisely, the CAS-based "push" onto cxq is ABA-oblivious. -// -// * Taken together, the cxq and the EntryList constitute or form a -// single logical queue of threads stalled trying to acquire the lock. -// We use two distinct lists to improve the odds of a constant-time -// dequeue operation after acquisition (in the ::enter() epilogue) and -// to reduce heat on the list ends. (c.f. Michael Scott's "2Q" algorithm). -// A key desideratum is to minimize queue & monitor metadata manipulation -// that occurs while holding the monitor lock -- that is, we want to -// minimize monitor lock holds times. Note that even a small amount of -// fixed spinning will greatly reduce the # of enqueue-dequeue operations -// on EntryList|cxq. That is, spinning relieves contention on the "inner" -// locks and monitor metadata. +// * The EntryList can have multiple concurrent "pushers" but only one +// concurrent detaching thread. This mechanism is immune from the +// ABA corruption. More precisely, the CAS-based "push" onto +// EntryList is ABA-oblivious. // -// Cxq points to the set of Recently Arrived Threads attempting entry. -// Because we push threads onto _cxq with CAS, the RATs must take the form of -// a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when -// the unlocking thread notices that EntryList is null but _cxq is != null. +// * The EntryList form a queue of threads stalled trying to acquire +// the lock. Within the EntryList the next pointers always form a +// consistent singly-linked list. At unlock-time when the unlocking +// thread notices that the tail of the EntryList is not known, we +// convert the singly-linked EntryList into a doubly-linked list by +// assigning the prev pointers and the EntryListTail pointer. // -// The EntryList is ordered by the prevailing queue discipline and -// can be organized in any convenient fashion, such as a doubly-linked list or -// a circular doubly-linked list. Critically, we want insert and delete operations -// to operate in constant-time. If we need a priority queue then something akin -// to Solaris' sleepq would work nicely. Viz., -// http://agg.eng/ws/on10_nightly/source/usr/src/uts/common/os/sleepq.c. -// Queue discipline is enforced at ::exit() time, when the unlocking thread -// drains the cxq into the EntryList, and orders or reorders the threads on the -// EntryList accordingly. +// As long as the EntryListTail is known the odds are good that we +// should be able to dequeue after acquisition (in the ::enter() +// epilogue) in constant-time. This is good since a key desideratum +// is to minimize queue & monitor metadata manipulation that occurs +// while holding the monitor lock -- that is, we want to minimize +// monitor lock holds times. Note that even a small amount of fixed +// spinning will greatly reduce the # of enqueue-dequeue operations +// on EntryList. That is, spinning relieves contention on the +// "inner" locks and monitor metadata. // -// Barring "lock barging", this mechanism provides fair cyclic ordering, -// somewhat similar to an elevator-scan. +// Insert and delete operations may not operate in constant-time if +// we have interference because some other thread is adding or +// removing the head element of EntryList or if we need to convert +// the singly-linked EntryList into a doubly-linked list to find the +// tail. // // * The monitor synchronization subsystem avoids the use of native // synchronization primitives except for the narrow platform-specific @@ -219,11 +256,11 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // * Waiting threads reside on the WaitSet list -- wait() puts // the caller onto the WaitSet. // -// * notify() or notifyAll() simply transfers threads from the WaitSet to -// either the EntryList or cxq. Subsequent exit() operations will -// unpark/re-schedule the notifyee. Unparking/re-scheduling a notifyee in -// notify() is inefficient - it's likely the notifyee would simply impale -// itself on the lock held by the notifier. +// * notify() or notifyAll() simply transfers threads from the WaitSet +// to either the EntryList. Subsequent exit() operations will +// unpark/re-schedule the notifyee. Unparking/re-scheduling a +// notifyee in notify() is inefficient - it's likely the notifyee +// would simply impale itself on the lock held by the notifier. // Check that object() and set_object() are called from the right context: static void check_object_context() { From d9b9713cbbb5da4e6f3464f3d55e1a427f65f142 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Fri, 14 Feb 2025 15:56:35 +0100 Subject: [PATCH 3/8] General cleanup --- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 8 +- src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 8 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 8 +- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 10 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 12 +- src/hotspot/share/jvmci/vmStructs_jvmci.cpp | 3 +- src/hotspot/share/runtime/objectMonitor.cpp | 428 ++++++++---------- src/hotspot/share/runtime/objectMonitor.hpp | 23 +- src/hotspot/share/runtime/synchronizer.cpp | 2 +- 9 files changed, 229 insertions(+), 273 deletions(-) diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 88205a3dda679..97cd00e652279 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -312,8 +312,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset())); + // Check if the entry_list is empty. + ldr(rscratch1, Address(tmp, ObjectMonitor::entry_list_offset())); cmp(rscratch1, zr); br(Assembler::EQ, cont); // If so we are done. @@ -633,8 +633,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset())); + // Check if the entry_list is empty. + ldr(rscratch1, Address(t1_monitor, ObjectMonitor::entry_list_offset())); cmp(rscratch1, zr); br(Assembler::EQ, unlocked); // If so we are done. diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 4c74e6bfb73a1..9c1b321270f7d 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2957,8 +2957,8 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); + // Check if the entry_list is empty. + ld(temp, in_bytes(ObjectMonitor::entry_list_offset()), current_header); cmpdi(flag, temp, 0); beq(flag, success); // If so we are done. @@ -3305,8 +3305,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); + // Check if the entry_list is empty. + ld(t, in_bytes(ObjectMonitor::entry_list_offset()), monitor); cmpdi(CR0, t, 0); beq(CR0, unlocked); // If so we are done. diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 248cb7b286937..2c9dea0140ec0 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -233,8 +233,8 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); + // Check if the entry_list is empty. + ld(t0, Address(tmp, ObjectMonitor::entry_list_offset())); beqz(t0, unlocked); // If so we are done. // Check if there is a successor. @@ -567,8 +567,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); + // Check if the entry_list is empty. + ld(t0, Address(tmp1_monitor, ObjectMonitor::entry_list_offset())); beqz(t0, unlocked); // If so we are done. // Check if there is a successor. diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 7d667f22bb5a7..3f61561f5b827 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3941,8 +3941,8 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg // We need a full fence after clearing owner to avoid stranding. z_fence(); - // Check if the EntryList is empty. - load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); + // Check if the entry_list is empty. + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(entry_list))); z_bre(done); // If so we are done. // Check if there is a successor. @@ -6791,7 +6791,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; - const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; + const Address entry_list_address{monitor, ObjectMonitor::entry_list_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; NearLabel not_recursive; @@ -6818,8 +6818,8 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis // We need a full fence after clearing owner to avoid stranding. z_fence(); - // Check if the entry lists are empty (EntryList first - by convention). - load_and_test_long(tmp2, EntryList_address); + // Check if the entry_list is empty. + load_and_test_long(tmp2, entry_list_address); z_bre(unlocked); // If so we are done. // Check if there is a successor. diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index a01300498fe0e..cc52e4e76129a 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -415,7 +415,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // as java routines or native JNI code called by this thread might // have released the lock. // Refer to the comments in synchronizer.cpp for how we might encode extra - // state in _succ so we can avoid fetching EntryList. + // state in _succ so we can avoid fetching entry_list. // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how @@ -447,8 +447,8 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), NULL_WORD); + // Check if the entry_list is empty. + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(entry_list)), NULL_WORD); jccb(Assembler::zero, LSuccess); // If so we are done. // Check if there is a successor. @@ -767,7 +767,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag}; - const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; + const Address entry_list_address{monitor, ObjectMonitor::entry_list_offset() - monitor_tag}; const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; Label recursive; @@ -783,8 +783,8 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // StoreLoad achieves this. membar(StoreLoad); - // Check if the EntryList is empty. - cmpptr(EntryList_address, NULL_WORD); + // Check if the entry_list is empty. + cmpptr(entry_list_address, NULL_WORD); jccb(Assembler::zero, unlocked); // If so we are done. // Check if there is a successor. diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index a4dbb14104fa9..c8963f757a7c7 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -329,8 +329,7 @@ \ volatile_nonstatic_field(ObjectMonitor, _owner, int64_t) \ volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \ - volatile_nonstatic_field(ObjectMonitor, _EntryListTail, ObjectWaiter*) \ - volatile_nonstatic_field(ObjectMonitor, _EntryList, ObjectWaiter*) \ + volatile_nonstatic_field(ObjectMonitor, _entry_list, ObjectWaiter*) \ volatile_nonstatic_field(ObjectMonitor, _succ, int64_t) \ volatile_nonstatic_field(ObjectMonitor, _stack_locker, BasicLock*) \ \ diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index f7b0c256968c0..fe1652929b125 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -127,28 +127,28 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // its owner_id (return value from owner_id_from()). // // * Invariant: A thread appears on at most one monitor list -- -// EntryList or WaitSet -- at any one time. +// entry_list or WaitSet -- at any one time. // -// * Contending threads "push" themselves onto the EntryList with CAS +// * Contending threads "push" themselves onto the entry_list with CAS // and then spin/park. // If the thread is a virtual thread it will first attempt to // unmount itself. The virtual thread will first try to freeze // all frames in the heap. If the operation fails it will just // follow the regular path for platform threads. If the operation -// succeeds, it will push itself onto the EntryList with CAS and then +// succeeds, it will push itself onto the entry_list with CAS and then // return back to Java to continue the unmount logic. // // * After a contending thread eventually acquires the lock it must -// dequeue itself from the EntryList. +// dequeue itself from the entry_list. // // * The exiting thread identifies and unparks an "heir presumptive" -// tentative successor thread on the EntryList. In case the successor +// tentative successor thread on the entry_list. In case the successor // is an unmounted virtual thread, the exiting thread will first try // to add it to the list of vthreads waiting to be unblocked, and on // success it will unpark the special unblocker thread instead, which // will be in charge of submitting the vthread back to the scheduler // queue. Critically, the exiting thread doesn't unlink the successor -// thread from the EntryList. After having been unparked/re-scheduled, +// thread from the entry_list. After having been unparked/re-scheduled, // the wakee will recontend for ownership of the monitor. The successor // (wakee) will either acquire the lock or re-park/unmount itself. // @@ -158,24 +158,24 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // Instead the exiting thread releases ownership and possibly wakes // a successor, so the successor can (re)compete for ownership of the lock. // -// The EntryList is a linked list, the first contending thread that -// "pushed" itself onto EntryList, will be the last thread in the -// list. Each newly pushed thread in EntryList will be linked trough +// The entry_list is a linked list, the first contending thread that +// "pushed" itself onto entry_list, will be the last thread in the +// list. Each newly pushed thread in entry_list will be linked trough // its next pointer, and have its prev pointer set to null. Thus -// pushing six threads A-F (in that order) onto EntryList, will +// pushing six threads A-F (in that order) onto entry_list, will // form a singly-linked list, see 1) below. // // Since the successor is chosen in FIFO order, the exiting thread -// needs to find the tail of the EntryList. This is done by walking -// from the EntryList head. While walking the list we also assign -// the prev pointers of each thread, essentially forming a doubly- +// needs to find the tail of the entry_list. This is done by walking +// from the entry_list head. While walking the list we also assign +// the prev pointers of each thread, essentially forming a doubly // linked list, see 2) below. // -// Once we have formed a doubly-linked list it's easy to find the +// Once we have formed a doubly linked list it's easy to find the // successor, wake it up, have it remove itself, and update the // tail pointer, as seen in 2) and 3) below. // -// At any time new threads can add themselves to the EntryList, see +// At any time new threads can add themselves to the entry_list, see // 4) and 5). // // If the thread that removes itself from the end of the list hasn't @@ -183,68 +183,68 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // 5) and 6). // // Next time we need to find the successor and the tail is null, we -// just start walking from the EntryList head again forming a new -// doubly-linked list, see 6) and 7) below. +// just start walking from the entry_list head again forming a new +// doubly linked list, see 6) and 7) below. // -// 1) EntryList ->F->E->D->C->B->A->null -// EntryListTail ->null +// 1) entry_list ->F->E->D->C->B->A->null +// entry_list_tail ->null // -// 2) EntryList ->F<=>E<=>D<=>C<=>B<=>A->null -// EntryListTail ----------------------^ +// 2) entry_list ->F<=>E<=>D<=>C<=>B<=>A->null +// entry_list_tail ----------------------^ // -// 3) EntryList ->F<=>E<=>D<=>C<=>B->null -// EntryListTail ------------------^ +// 3) entry_list ->F<=>E<=>D<=>C<=>B->null +// entry_list_tail ------------------^ // -// 4) EntryList ->F->null -// EntryListTail --^ +// 4) entry_list ->F->null +// entry_list_tail --^ // -// 5) EntryList ->I->H->G->F->null -// EntryListTail -----------^ +// 5) entry_list ->I->H->G->F->null +// entry_list_tail -----------^ // -// 6) EntryList ->I->H->G->null -// EntryListTail ->null +// 6) entry_list ->I->H->G->null +// entry_list_tail ->null // -// 7) EntryList ->I<=>H<=>G->null -// EntryListTail ----------^ +// 7) entry_list ->I<=>H<=>G->null +// entry_list_tail ----------^ // // * Concurrency invariants: // -// -- only the monitor owner may assign prev pointers in EntryList. -// The mutex property of the monitor itself protects the EntryList +// -- only the monitor owner may assign prev pointers in entry_list. +// The mutex property of the monitor itself protects the entry_list // from concurrent interference. -// -- Only the monitor owner may detach nodes from the EntryList. +// -- Only the monitor owner may detach nodes from the entry_list. // // * The monitor entry list operations avoid locks, but strictly speaking // they're not lock-free. Enter is lock-free, exit is not. // For a description of 'Methods and apparatus providing non-blocking access // to a resource,' see U.S. Pat. No. 7844973. // -// * The EntryList can have multiple concurrent "pushers" but only one +// * The entry_list can have multiple concurrent "pushers" but only one // concurrent detaching thread. This mechanism is immune from the // ABA corruption. More precisely, the CAS-based "push" onto -// EntryList is ABA-oblivious. +// entry_list is ABA-oblivious. // -// * The EntryList form a queue of threads stalled trying to acquire -// the lock. Within the EntryList the next pointers always form a +// * The entry_list form a queue of threads stalled trying to acquire +// the lock. Within the entry_list the next pointers always form a // consistent singly-linked list. At unlock-time when the unlocking -// thread notices that the tail of the EntryList is not known, we -// convert the singly-linked EntryList into a doubly-linked list by -// assigning the prev pointers and the EntryListTail pointer. +// thread notices that the tail of the entry_list is not known, we +// convert the singly-linked entry_list into a doubly linked list by +// assigning the prev pointers and the entry_list_tail pointer. // -// As long as the EntryListTail is known the odds are good that we +// As long as the entry_list_tail is known the odds are good that we // should be able to dequeue after acquisition (in the ::enter() // epilogue) in constant-time. This is good since a key desideratum // is to minimize queue & monitor metadata manipulation that occurs // while holding the monitor lock -- that is, we want to minimize // monitor lock holds times. Note that even a small amount of fixed // spinning will greatly reduce the # of enqueue-dequeue operations -// on EntryList. That is, spinning relieves contention on the +// on entry_list. That is, spinning relieves contention on the // "inner" locks and monitor metadata. // // Insert and delete operations may not operate in constant-time if // we have interference because some other thread is adding or -// removing the head element of EntryList or if we need to convert -// the singly-linked EntryList into a doubly-linked list to find the +// removing the head element of entry_list or if we need to convert +// the singly-linked entry_list into a doubly linked list to find the // tail. // // * The monitor synchronization subsystem avoids the use of native @@ -257,7 +257,7 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // the caller onto the WaitSet. // // * notify() or notifyAll() simply transfers threads from the WaitSet -// to either the EntryList. Subsequent exit() operations will +// to either the entry_list. Subsequent exit() operations will // unpark/re-schedule the notifyee. Unparking/re-scheduling a // notifyee in notify() is inefficient - it's likely the notifyee // would simply impale itself on the lock held by the notifier. @@ -292,8 +292,8 @@ ObjectMonitor::ObjectMonitor(oop object) : _previous_owner_tid(0), _next_om(nullptr), _recursions(0), - _EntryList(nullptr), - _EntryListTail(nullptr), + _entry_list(nullptr), + _entry_list_tail(nullptr), _succ(NO_OWNER), _SpinDuration(ObjectMonitor::Knob_SpinLimit), _contentions(0), @@ -510,7 +510,7 @@ bool ObjectMonitor::enter(JavaThread* current) { return true; } -void ObjectMonitor::notify_contended_enter(JavaThread *current) { +void ObjectMonitor::notify_contended_enter(JavaThread* current) { current->set_current_pending_monitor(this); DTRACE_MONITOR_PROBE(contended__enter, this, object(), current); @@ -525,7 +525,7 @@ void ObjectMonitor::notify_contended_enter(JavaThread *current) { } } -void ObjectMonitor::enter_with_contention_mark(JavaThread *current, ObjectMonitorContentionMark &cm) { +void ObjectMonitor::enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark &cm) { assert(current == JavaThread::current(), "must be"); assert(!has_owner(current), "must be"); assert(cm._monitor == this, "must be"); @@ -555,7 +555,7 @@ void ObjectMonitor::enter_with_contention_mark(JavaThread *current, ObjectMonito bool acquired = VThreadMonitorEnter(current); if (acquired) { // We actually acquired the monitor while trying to add the vthread to the - // _cxq so cancel preemption. We will still go through the preempt stub + // _entry_list so cancel preemption. We will still go through the preempt stub // but instead of unmounting we will call thaw to continue execution. current->set_preemption_cancelled(true); if (JvmtiExport::should_post_monitor_contended_entered()) { @@ -694,40 +694,40 @@ ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference; } -// Push "current" onto the front of the _EntryList. Once on _EntryList, +// Push "current" onto the front of the _entry_list. Once on _entry_list, // current stays on-queue until it acquires the lock. -void ObjectMonitor::AddToEntryList(JavaThread* current, ObjectWaiter* node) { +void ObjectMonitor::add_to_entry_list(JavaThread* current, ObjectWaiter* node) { node->_prev = nullptr; node->TState = ObjectWaiter::TS_ENTER; for (;;) { - ObjectWaiter* front = _EntryList; + ObjectWaiter* front = Atomic::load_acquire(&_entry_list); node->_next = front; - if (Atomic::cmpxchg(&_EntryList, front, node) == front) { + if (Atomic::cmpxchg(&_entry_list, front, node) == front) { return; } } } -// Push "current" onto the front of the EntryList. -// If the _EntryList was changed during our push operation, we try to +// Push "current" onto the front of the entry_list. +// If the _entry_list was changed during our push operation, we try to // lock the monitor. Returns true if we locked the monitor, and false -// if we added current to _EntryList. Once on _EntryList, current +// if we added current to _entry_list. Once on _entry_list, current // stays on-queue until it acquires the lock. -bool ObjectMonitor::TryLockOrAddToEntryList(JavaThread* current, ObjectWaiter* node) { +bool ObjectMonitor::try_lock_or_add_to_entry_list(JavaThread* current, ObjectWaiter* node) { node->_prev = nullptr; node->TState = ObjectWaiter::TS_ENTER; for (;;) { - ObjectWaiter* front = _EntryList; + ObjectWaiter* front = Atomic::load_acquire(&_entry_list); node->_next = front; - if (Atomic::cmpxchg(&_EntryList, front, node) == front) { + if (Atomic::cmpxchg(&_entry_list, front, node) == front) { return false; } - // Interference - the CAS failed because _EntryList changed. Just retry. + // Interference - the CAS failed because _entry_list changed. Just retry. // As an optional optimization we retry the lock. if (TryLock(current) == TryLockResult::Success) { assert(!has_successor(current), "invariant"); @@ -807,9 +807,9 @@ bool ObjectMonitor::deflate_monitor(Thread* current) { guarantee(contentions() < 0, "must be negative: contentions=%d", contentions()); guarantee(_waiters == 0, "must be 0: waiters=%d", _waiters); - guarantee(_EntryList == nullptr, - "must be no entering threads: EntryList=" INTPTR_FORMAT, - p2i(_EntryList)); + guarantee(_entry_list == nullptr, + "must be no entering threads: entry_list=" INTPTR_FORMAT, + p2i(_entry_list)); if (obj != nullptr) { if (log_is_enabled(Trace, monitorinflation)) { @@ -894,7 +894,7 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { ss->print("is_busy: waiters=%d" ", contentions=%d" ", owner=" INT64_FORMAT - ", EntryList=" PTR_FORMAT, + ", entry_list=" PTR_FORMAT, _waiters, (contentions() > 0 ? contentions() : 0), owner_is_DEFLATER_MARKER() @@ -902,7 +902,7 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { // ignores DEFLATER_MARKER values. ? NO_OWNER : owner_raw(), - p2i(_EntryList)); + p2i(_entry_list)); return ss->base(); } @@ -935,7 +935,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { assert(!has_successor(current), "invariant"); assert(!has_owner(current), "invariant"); - // Enqueue "current" on ObjectMonitor's _EntryList. + // Enqueue "current" on ObjectMonitor's _entry_list. // // Node acts as a proxy for current. // As an aside, if were to ever rewrite the synchronization code mostly @@ -947,15 +947,15 @@ void ObjectMonitor::EnterI(JavaThread* current) { ObjectWaiter node(current); current->_ParkEvent->reset(); - if (TryLockOrAddToEntryList(current, &node)) { + if (try_lock_or_add_to_entry_list(current, &node)) { return; // We got the lock. } - // This thread is now added to the _EntryList. + // This thread is now added to the _entry_list. // The lock might have been released while this thread was occupied queueing - // itself onto _EntryList. To close the race and avoid "stranding" and + // itself onto _entry_list. To close the race and avoid "stranding" and // progress-liveness failure we must resample-retry _owner before parking. - // Note the Dekker/Lamport duality: ST _EntryList; MEMBAR; LD Owner. + // Note the Dekker/Lamport duality: ST _entry_list; MEMBAR; LD Owner. // In this case the ST-MEMBAR is accomplished with CAS(). // // TODO: Defer all thread state transitions until park-time. @@ -1031,7 +1031,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { } // Egress : - // Current has acquired the lock -- Unlink current from the _EntryList. + // Current has acquired the lock -- Unlink current from the _entry_list. UnlinkAfterAcquire(current, &node); if (has_successor(current)) { clear_successor(); @@ -1041,9 +1041,9 @@ void ObjectMonitor::EnterI(JavaThread* current) { // We've acquired ownership with CAS(). // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics. - // But since the CAS() this thread may have also stored into _succ, - // EntryList or cxq. These meta-data updates must be - // visible __before this thread subsequently drops the lock. + // But since the CAS() this thread may have also stored into _succ + // or entry_list. These meta-data updates must be visible __before + // this thread subsequently drops the lock. // Consider what could occur if we didn't enforce this constraint -- // STs to monitor meta-data and user-data could reorder with (become // visible after) the ST in exit that drops ownership of the lock. @@ -1057,7 +1057,7 @@ void ObjectMonitor::EnterI(JavaThread* current) { // therefore before some other thread (CPU) has a chance to acquire the lock. // See also: http://gee.cs.oswego.edu/dl/jmm/cookbook.html. // - // Critically, any prior STs to _succ or EntryList must be visible before + // Critically, any prior STs to _succ or entry_list must be visible before // the ST of null into _owner in the *subsequent* (following) corresponding // monitorexit. @@ -1130,7 +1130,7 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) { OM_PERFDATA_OP(FutileWakeups, inc()); } - // Current has acquired the lock -- Unlink current from the _EntryList. + // Current has acquired the lock -- Unlink current from the _entry_list. assert(has_owner(current), "invariant"); assert_mark_word_consistency(); UnlinkAfterAcquire(current, currentNode); @@ -1155,15 +1155,15 @@ bool ObjectMonitor::VThreadMonitorEnter(JavaThread* current, ObjectWaiter* waite oop vthread = current->vthread(); ObjectWaiter* node = waiter != nullptr ? waiter : new ObjectWaiter(vthread, this); - if (TryLockOrAddToEntryList(current, node)) { + if (try_lock_or_add_to_entry_list(current, node)) { // We got the lock. if (waiter == nullptr) delete node; // for Object.wait() don't delete yet return true; } - // This thread is now added to the EntryList. + // This thread is now added to the entry_list. // We have to try once more since owner could have exited monitor and checked - // _EntryList before we added the node to the queue. + // _entry_list before we added the node to the queue. if (TryLock(current) == TryLockResult::Success) { assert(has_owner(current), "invariant"); UnlinkAfterAcquire(current, node); @@ -1252,34 +1252,33 @@ void ObjectMonitor::VThreadEpilog(JavaThread* current, ObjectWaiter* node) { } } -// Return the tail of the _EntryList. If the tail is currently not -// known, find it by walking from the head of _EntryList, and while -// doing so assign the _prev pointers to create a DLL list. -ObjectWaiter *ObjectMonitor::EntryListTail(JavaThread* current) { +// Return the tail of the _entry_list. If the tail is currently not +// known, find it by walking from the head of _entry_list, and while +// doing so assign the _prev pointers to create a doubly linked list. +ObjectWaiter* ObjectMonitor::entry_list_tail(JavaThread* current) { assert(has_owner(current), "invariant"); - ObjectWaiter *w = _EntryListTail; + ObjectWaiter* w = _entry_list_tail; if (w != nullptr) { return w; } - w = _EntryList; + w = _entry_list; assert(w != nullptr, "invariant"); if (w->_next == nullptr) { - _EntryListTail = w; + _entry_list_tail = w; return w; } - ObjectWaiter *prev = nullptr; + ObjectWaiter* prev = nullptr; while (w != nullptr) { assert(w->TState == ObjectWaiter::TS_ENTER, "invariant"); w->_prev = prev; prev = w; w = w->_next; } - _EntryListTail = prev; + _entry_list_tail = prev; return prev; } -static void set_bad_pointers(ObjectWaiter* currentNode) -{ +static void set_bad_pointers(ObjectWaiter* currentNode) { #ifdef ASSERT // Diagnostic hygiene ... currentNode->_prev = (ObjectWaiter*) 0xBAD; @@ -1288,10 +1287,10 @@ static void set_bad_pointers(ObjectWaiter* currentNode) #endif } -// By convention we unlink a contending thread from _EntryList immediately +// By convention we unlink a contending thread from _entry_list immediately // after the thread acquires the lock in ::enter(). Equally, we could defer // unlinking the thread until ::exit()-time. -// The head of _EntryList is volatile but the interior is stable. +// The head of _entry_list is volatile but the interior is stable. // In addition, current.TState is stable. void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* currentNode) { @@ -1299,103 +1298,81 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren assert((!currentNode->is_vthread() && currentNode->thread() == current) || (currentNode->is_vthread() && currentNode->vthread() == current->vthread()), "invariant"); - // Check if we are unlinking the last element in the _EntryList. + // Check if we are unlinking the last element in the _entry_list. // This is by far the most common case. if (currentNode->_next == nullptr) { - assert(_EntryListTail == nullptr || _EntryListTail == currentNode, "invariant"); + assert(_entry_list_tail == nullptr || _entry_list_tail == currentNode, "invariant"); - ObjectWaiter *v = _EntryList; + ObjectWaiter* v = Atomic::load_acquire(&_entry_list); if (v == currentNode) { - // The currentNode is the only element in _EntryList. - if (Atomic::cmpxchg(&_EntryList, v, (ObjectWaiter*)nullptr) == v) { - _EntryListTail = nullptr; - set_bad_pointers(currentNode); - return; - } - // The CAS above can fail from interference IFF a new element - // was added to the head of _EntryList. In that case currentNode - // must be in the interior and can no longer be at the head of - // _EntryList. - v = _EntryList; - assert(v != currentNode, "invariant"); - if (v->_next == currentNode) { - // The currentNode is located just after the head of _EntryList. - v->_next= nullptr; - _EntryListTail = v; + // The currentNode is the only element in _entry_list. + if (Atomic::cmpxchg(&_entry_list, v, (ObjectWaiter*)nullptr) == v) { + _entry_list_tail = nullptr; set_bad_pointers(currentNode); return; } + // The CAS above can fail from interference IFF a contending + // thread "pushed" itself onto entry_list. } - if (currentNode->_prev != nullptr) { - // The currentNode is the last element in _EntryList and we know - // which element is the previous one. - assert(_EntryList != currentNode, "invariant"); - _EntryListTail = currentNode->_prev; - _EntryListTail->_next = nullptr; - set_bad_pointers(currentNode); - return; + if (currentNode->_prev == nullptr) { + // Build the doubly linked list to get hold of + // currentNode->_prev. + _entry_list_tail = nullptr; + entry_list_tail(current); + assert(currentNode->_prev != nullptr, "must be"); + } - // Here we know that the currentNode is the last element in - // _EntryList but we don't we know which element is the previous - // one. + // The currentNode is the last element in _entry_list and we know + // which element is the previous one. + assert(_entry_list != currentNode, "invariant"); + _entry_list_tail = currentNode->_prev; + _entry_list_tail->_next = nullptr; + set_bad_pointers(currentNode); + return; } + assert(currentNode->_next != nullptr, "invariant"); + assert(currentNode != _entry_list_tail, "invariant"); + if (currentNode->_prev == nullptr) { - // We don't know which element is the previous one, so we need to find it. - ObjectWaiter *v = _EntryList; + ObjectWaiter* v = Atomic::load_acquire(&_entry_list); + assert(v != nullptr, "invariant"); - if (v != currentNode || Atomic::cmpxchg(&_EntryList, v, currentNode->_next) != v) { - // The CAS above can fail from interference IFF a new element - // was added to the head of _EntryList. In that case currentNode - // must be in the interior and can no longer be at the head of - // _EntryList. - if (v == currentNode) { - assert(_EntryList != v, "invariant"); - v = _EntryList; // CAS above failed - start scan at head of list - } - ObjectWaiter* p; - ObjectWaiter* q = nullptr; - for (p = v; p != nullptr && p != currentNode; p = p->_next) { - assert(p->TState == ObjectWaiter::TS_ENTER, "invariant"); - p->_prev = q; // We might as well assign the DLL _prev-pointer. - q = p; - } - assert(v != currentNode, "invariant"); - assert(p == currentNode, "Node not found on EntryList"); - assert(p != _EntryList, "invariant"); - assert(q != nullptr, "invariant"); - assert(q->_next == p, "invariant"); - q->_next = p->_next; - if (p->_next != nullptr) { - p->_next->_prev = q; - } else { - assert(_EntryListTail == nullptr || currentNode == _EntryListTail, "invariant"); - _EntryListTail = currentNode->_prev; - } - } else if (v == currentNode) { - // The CAS above sucsessfully unlinked currentNode from the head of the _EntryList. - assert(_EntryList != v, "invariant"); - if (currentNode->_next != nullptr) { - currentNode->_next->_prev = nullptr; + if (v == currentNode) { + // currentNode is at the head of _entry_list. + if (Atomic::cmpxchg(&_entry_list, v, currentNode->_next) == v) { + // The CAS above sucsessfully unlinked currentNode from the head of the _entry_list. + assert(_entry_list != v, "invariant"); + _entry_list->_prev = nullptr; + set_bad_pointers(currentNode); + return; } else { - assert(_EntryListTail == nullptr || _EntryListTail == currentNode, "invariant"); - _EntryListTail = nullptr; + // The CAS above can fail from interference IFF a contending + // thread "pushed" itself onto entry_list, in which case + // currentNode must now be in the interior of the list. + assert(_entry_list != currentNode, "invariant"); } } - } else { - assert(currentNode->_next != nullptr, ""); - assert(currentNode->_prev != nullptr, ""); - assert(currentNode != _EntryList, ""); - assert(currentNode != _EntryListTail, ""); - - ObjectWaiter* nxt = currentNode->_next; - ObjectWaiter* prv = currentNode->_prev; - nxt->_prev = prv; - prv->_next = nxt; - assert(nxt->TState == ObjectWaiter::TS_ENTER, "invariant"); - assert(prv->TState == ObjectWaiter::TS_ENTER, "invariant"); - } - + // Build the doubly linked list to get hold of currentNode->_prev. + _entry_list_tail = nullptr; + entry_list_tail(current); + assert(currentNode->_prev != nullptr, "must be"); + } + + // We now assume we are unlinking currentNode from the interior of a + // doubly linked list. + assert(currentNode->_next != nullptr, ""); + assert(currentNode->_prev != nullptr, ""); + assert(currentNode != _entry_list, ""); + assert(currentNode != _entry_list_tail, ""); + + ObjectWaiter* nxt = currentNode->_next; + ObjectWaiter* prv = currentNode->_prev; + assert(nxt->TState == ObjectWaiter::TS_ENTER, "invariant"); + assert(prv->TState == ObjectWaiter::TS_ENTER, "invariant"); + + nxt->_prev = prv; + prv->_next = nxt; set_bad_pointers(currentNode); } @@ -1420,33 +1397,25 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // C2_MacroAssembler::fast_unlock() // // 1. A release barrier ensures that changes to monitor meta-data -// (_succ, _EntryList, _cxq) and data protected by the lock will be +// (_succ, _entry_list) and data protected by the lock will be // visible before we release the lock. // 2. Release the lock by clearing the owner. // 3. A storeload MEMBAR is needed between releasing the owner and // subsequently reading meta-data to safely determine if the lock is // contended (step 4) without an elected successor (step 5). -// 4. If both _EntryList and _cxq are null, we are done, since there is no +// 4. If _entry_list is null, we are done, since there is no // other thread waiting on the lock to wake up. I.e. there is no // contention. // 5. If there is a successor (_succ is non-null), we are done. The // responsibility for guaranteeing progress-liveness has now implicitly // been moved from the exiting thread to the successor. -// 6. There are waiters in the entry list (_EntryList and/or cxq are -// non-null), but there is no successor (_succ is null), so we need to +// 6. There are waiters in the entry list (_entry_list is non-null), +// but there is no successor (_succ is null), so we need to // wake up (unpark) a waiting thread to avoid stranding. // -// Note that since only the current lock owner can manipulate the _EntryList -// or drain _cxq, we need to reacquire the lock before we can wake up -// (unpark) a waiting thread. -// -// Note that we read the EntryList and then the cxq after dropping the -// lock, so the values need not form a stable snapshot. In particular, -// after reading the (empty) EntryList, another thread could acquire -// and release the lock, moving any entries in the cxq to the -// EntryList, causing the current thread to see an empty cxq and -// conclude there are no waiters. But this is okay as the thread that -// moved the cxq is responsible for waking the successor. +// Note that since only the current lock owner can manipulate the +// _entry_list (except for pushing new threads to the head), we need to +// reacquire the lock before we can wake up (unpark) a waiting thread. // // The CAS() in enter provides for safety and exclusion, while the // MEMBAR in exit provides for progress and avoids stranding. @@ -1504,11 +1473,6 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { release_clear_owner(current); OrderAccess::storeload(); - if (_EntryList == nullptr || has_successor()) { - return; - } - // Other threads are blocked trying to acquire the lock. - // Normally the exiting thread is responsible for ensuring succession, // but if this thread observes other successors are ready or other // entering threads are spinning after it has stored null into _owner @@ -1534,11 +1498,16 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // the lock. Note that the dropped lock needs to become visible to the // spinner. - // It appears that an heir-presumptive (successor) must be made ready. - // Only the current lock owner can manipulate the EntryList or - // drain _cxq, so we need to reacquire the lock. If we fail - // to reacquire the lock the responsibility for ensuring succession - // falls to the new owner. + if (_entry_list == nullptr || has_successor()) { + return; + } + + // Other threads are blocked trying to acquire the lock and there + // is no successor, so it appears that an heir-presumptive + // (successor) must be made ready. Only the current lock owner can + // detach threads from the entry_list, therefore we need to + // reacquire the lock. If we fail to reacquire the lock the + // responsibility for ensuring succession falls to the new owner. if (TryLock(current) != TryLockResult::Success) { // Some other thread acquired the lock (or the monitor was @@ -1550,18 +1519,18 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { ObjectWaiter* w = nullptr; - w = _EntryList; + w = _entry_list; if (w != nullptr) { - w = EntryListTail(current); + w = entry_list_tail(current); // I'd like to write: guarantee (w->_thread != current). - // But in practice an exiting thread may find itself on the EntryList. + // But in practice an exiting thread may find itself on the entry_list. // Let's say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and // then calls exit(). Exit release the lock by setting O._owner to null. // Let's say T1 then stalls. T2 acquires O and calls O.notify(). The - // notify() operation moves T1 from O's waitset to O's EntryList. T2 then + // notify() operation moves T1 from O's waitset to O's entry_list. T2 then // release the lock "O". T2 resumes immediately after the ST of null into - // _owner, above. T2 notices that the EntryList is populated, so it - // reacquires the lock and then finds itself on the EntryList. + // _owner, above. T2 notices that the entry_list is populated, so it + // reacquires the lock and then finds itself on the entry_list. // Given all that, we have to tolerate the circumstance where "w" is // associated with current. assert(w->TState == ObjectWaiter::TS_ENTER, "invariant"); @@ -1686,7 +1655,7 @@ static void post_monitor_wait_event(EventJavaMonitorWait* event, event->commit(); } -static void vthread_monitor_waited_event(JavaThread *current, ObjectWaiter* node, ContinuationWrapper& cont, EventJavaMonitorWait* event, jboolean timed_out) { +static void vthread_monitor_waited_event(JavaThread* current, ObjectWaiter* node, ContinuationWrapper& cont, EventJavaMonitorWait* event, jboolean timed_out) { // Since we might safepoint set the anchor so that the stack can we walked. assert(current->last_continuation() != nullptr, ""); JavaFrameAnchor* anchor = current->frame_anchor(); @@ -1821,8 +1790,8 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { } } - // Node may be on the WaitSet, the EntryList (or cxq), or in transition - // from the WaitSet to the EntryList. + // Node may be on the WaitSet, or on the entry_list, or in transition + // from the WaitSet to the entry_list. // See if we need to remove Node from the WaitSet. // We use double-checked locking to avoid grabbing _WaitSetLock // if the thread is not on the wait queue. @@ -1847,7 +1816,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { } // The thread is now either on off-list (TS_RUN), - // or on the EntryList (TS_ENTER). + // or on the entry_list (TS_ENTER). // The Node's TState variable is stable from the perspective of this thread. // No other threads will asynchronously modify TState. guarantee(node.TState != ObjectWaiter::TS_WAIT, "invariant"); @@ -1947,8 +1916,8 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { } // Consider: -// If the lock is cool (cxq == null && succ == null) and we're on an MP system -// then instead of transferring a thread from the WaitSet to the EntryList +// If the lock is cool (entry_list == null && succ == null) and we're on an MP system +// then instead of transferring a thread from the WaitSet to the entry_list // we might just dequeue a thread from the WaitSet and directly unpark() it. void ObjectMonitor::INotify(JavaThread* current) { @@ -1957,11 +1926,6 @@ void ObjectMonitor::INotify(JavaThread* current) { if (iterator != nullptr) { guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant"); guarantee(!iterator->_notified, "invariant"); - // Disposition - what might we do with iterator ? - // a. add it directly to the EntryList - either tail (policy == 1) - // or head (policy == 0). - // b. push it onto the front of the _cxq (policy == 2). - // For now we use (b). if (iterator->is_vthread()) { oop vthread = iterator->vthread(); @@ -1981,10 +1945,10 @@ void ObjectMonitor::INotify(JavaThread* current) { iterator->_notified = true; iterator->_notifier_tid = JFR_THREAD_ID(current); - AddToEntryList(current, iterator); + add_to_entry_list(current, iterator); - // _WaitSetLock protects the wait queue, not the EntryList. We could - // move the add-to-EntryList operation, above, outside the critical section + // _WaitSetLock protects the wait queue, not the entry_list. We could + // move the add-to-entry_list operation, above, outside the critical section // protected by _WaitSetLock. In practice that's not useful. With the // exception of wait() timeouts and interrupts the monitor owner // is the only thread that grabs _WaitSetLock. There's almost no contention @@ -2021,11 +1985,11 @@ void ObjectMonitor::notify(TRAPS) { // The current implementation of notifyAll() transfers the waiters one-at-a-time -// from the waitset to the EntryList. This could be done more efficiently with a +// from the waitset to the entry_list. This could be done more efficiently with a // single bulk transfer but in practice it's not time-critical. Beware too, // that in prepend-mode we invert the order of the waiters. Let's say that the -// waitset is "ABCD" and the EntryList is "XYZ". After a notifyAll() in prepend -// mode the waitset will be empty and the EntryList will be "DCBAXYZ". +// waitset is "ABCD" and the entry_list is "XYZ". After a notifyAll() in prepend +// mode the waitset will be empty and the entry_list will be "DCBAXYZ". void ObjectMonitor::notifyAll(TRAPS) { JavaThread* current = THREAD; @@ -2124,7 +2088,7 @@ bool ObjectMonitor::VThreadWaitReenter(JavaThread* current, ObjectWaiter* node, return true; } } else { - // Already moved to _cxq or _EntryList by notifier, so just add to contentions. + // Already moved to _entry_list by notifier, so just add to contentions. add_to_contentions(1); } return false; @@ -2136,13 +2100,7 @@ bool ObjectMonitor::VThreadWaitReenter(JavaThread* current, ObjectWaiter* node, // Adaptive spin-then-block - rational spinning // // Note that we spin "globally" on _owner with a classic SMP-polite TATAS -// algorithm. On high order SMP systems it would be better to start with -// a brief global spin and then revert to spinning locally. In the spirit of MCS/CLH, -// a contending thread could enqueue itself on the cxq and then spin locally -// on a thread-specific variable such as its ParkEvent._Event flag. -// That's left as an exercise for the reader. Note that global spinning is -// not problematic on Niagara, as the L2 cache serves the interconnect and -// has both low latency and massive bandwidth. +// algorithm. // // Broadly, we can fix the spin frequency -- that is, the % of contended lock // acquisition attempts where we opt to spin -- at 100% and vary the spin count @@ -2570,8 +2528,8 @@ void ObjectMonitor::print() const { print_on(tty); } // } // _next_om = 0x0000000000000000 // _recursions = 0 -// _EntryList = 0x0000000000000000 -// _EntryListTail = 0x0000000000000000 +// _entry_list = 0x0000000000000000 +// _entry_list_tail = 0x0000000000000000 // _succ = 0x0000000000000000 // _SpinDuration = 5000 // _contentions = 0 @@ -2598,8 +2556,8 @@ void ObjectMonitor::print_debug_style_on(outputStream* st) const { st->print_cr(" }"); st->print_cr(" _next_om = " INTPTR_FORMAT, p2i(next_om())); st->print_cr(" _recursions = %zd", _recursions); - st->print_cr(" _EntryList = " INTPTR_FORMAT, p2i(_EntryList)); - st->print_cr(" _EntryListTail = " INTPTR_FORMAT, p2i(_EntryListTail)); + st->print_cr(" _entry_list = " INTPTR_FORMAT, p2i(_entry_list)); + st->print_cr(" _entry_list_tail = " INTPTR_FORMAT, p2i(_entry_list_tail)); st->print_cr(" _succ = " INT64_FORMAT, successor()); st->print_cr(" _SpinDuration = %d", _SpinDuration); st->print_cr(" _contentions = %d", contentions()); diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index a95605e3a26c5..5911dc33f052f 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -120,7 +120,7 @@ class ObjectWaiter : public CHeapObj { // monitorenter will invalidate the line underlying _owner. We want // to avoid an L1 data cache miss on that same line for monitorexit. // Putting these : -// _recursions, _EntryList, _cxq, and _succ, all of which may be +// _recursions, _entry_list and _succ, all of which may be // fetched in the inflated unlock path, on a different cache line // would make them immune to CAS-based invalidation from the _owner // field. @@ -173,11 +173,10 @@ class ObjectMonitor : public CHeapObj { sizeof(volatile uint64_t)); ObjectMonitor* _next_om; // Next ObjectMonitor* linkage volatile intx _recursions; // recursion count, 0 for first entry - ObjectWaiter* volatile _EntryList; // Threads blocked on entry or reentry. - // The list is actually composed of WaitNodes, - // acting as proxies for Threads. - - ObjectWaiter* volatile _EntryListTail; // LL of recently-arrived threads blocked on entry. + ObjectWaiter* volatile _entry_list; // Threads blocked on entry or reentry. + // The list is actually composed of WaitNodes, + // acting as proxies for Threads. + ObjectWaiter* volatile _entry_list_tail; // _entry_list is the head, this is the tail. int64_t volatile _succ; // Heir presumptive thread - used for futile wakeup throttling volatile int _SpinDuration; @@ -246,7 +245,7 @@ class ObjectMonitor : public CHeapObj { static ByteSize owner_offset() { return byte_offset_of(ObjectMonitor, _owner); } static ByteSize recursions_offset() { return byte_offset_of(ObjectMonitor, _recursions); } static ByteSize succ_offset() { return byte_offset_of(ObjectMonitor, _succ); } - static ByteSize EntryList_offset() { return byte_offset_of(ObjectMonitor, _EntryList); } + static ByteSize entry_list_offset() { return byte_offset_of(ObjectMonitor, _entry_list); } // ObjectMonitor references can be ORed with markWord::monitor_value // as part of the ObjectMonitor tagging mechanism. When we combine an @@ -274,7 +273,7 @@ class ObjectMonitor : public CHeapObj { bool is_busy() const { // TODO-FIXME: assert _owner == NO_OWNER implies _recursions = 0 - intptr_t ret_code = intptr_t(_waiters) | intptr_t(_EntryList); + intptr_t ret_code = intptr_t(_waiters) | intptr_t(_entry_list); int cnts = contentions(); // read once if (cnts > 0) { ret_code |= intptr_t(cnts); @@ -316,7 +315,7 @@ class ObjectMonitor : public CHeapObj { int64_t try_set_owner_from(int64_t old_value, JavaThread* current); // Methods to check and set _succ. The successor is the thread selected - // from _cxq/_EntryList by the current owner when releasing the monitor, + // from _entry_list by the current owner when releasing the monitor, // to run again and re-try acquiring the monitor. It is used to avoid // unnecessary wake-ups if there is already a successor set. bool has_successor() const; @@ -418,7 +417,7 @@ class ObjectMonitor : public CHeapObj { intx complete_exit(JavaThread* current); private: - void AddToEntryList(JavaThread* current, ObjectWaiter* node); + void add_to_entry_list(JavaThread* current, ObjectWaiter* node); void AddWaiter(ObjectWaiter* waiter); void INotify(JavaThread* current); ObjectWaiter* DequeueWaiter(); @@ -426,7 +425,7 @@ class ObjectMonitor : public CHeapObj { void EnterI(JavaThread* current); void ReenterI(JavaThread* current, ObjectWaiter* current_node); void UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node); - ObjectWaiter* EntryListTail(JavaThread* current); + ObjectWaiter* entry_list_tail(JavaThread* current); bool VThreadMonitorEnter(JavaThread* current, ObjectWaiter* node = nullptr); void VThreadWait(JavaThread* current, jlong millis); @@ -436,7 +435,7 @@ class ObjectMonitor : public CHeapObj { enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 }; bool TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark); - bool TryLockOrAddToEntryList(JavaThread* current, ObjectWaiter* node); + bool try_lock_or_add_to_entry_list(JavaThread* current, ObjectWaiter* node); TryLockResult TryLock(JavaThread* current); bool TrySpin(JavaThread* current); diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp index 0ee3e24760d18..f48035db2e866 100644 --- a/src/hotspot/share/runtime/synchronizer.cpp +++ b/src/hotspot/share/runtime/synchronizer.cpp @@ -366,7 +366,7 @@ bool ObjectSynchronizer::quick_notify(oopDesc* obj, JavaThread* current, bool al if (mon->first_waiter() != nullptr) { // We have one or more waiters. Since this is an inflated monitor // that we own, we can transfer one or more threads from the waitset - // to the entrylist here and now, avoiding the slow-path. + // to the entry_list here and now, avoiding the slow-path. if (all) { DTRACE_MONITOR_PROBE(notifyAll, mon, obj, current); } else { From f8e8e10a4980e24bf32d84cab42160b1fd90d78b Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 19 Feb 2025 13:44:58 +0100 Subject: [PATCH 4/8] Fixed a bug in UnlinkAfterAcquire --- src/hotspot/share/runtime/objectMonitor.cpp | 28 +++++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index fe1652929b125..a63f19a743362 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -1261,7 +1261,7 @@ ObjectWaiter* ObjectMonitor::entry_list_tail(JavaThread* current) { if (w != nullptr) { return w; } - w = _entry_list; + w = Atomic::load_acquire(&_entry_list); assert(w != nullptr, "invariant"); if (w->_next == nullptr) { _entry_list_tail = w; @@ -1312,7 +1312,9 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren return; } // The CAS above can fail from interference IFF a contending - // thread "pushed" itself onto entry_list. + // thread "pushed" itself onto entry_list. So fall-through to + // building the doubly-linked list. + assert(currentNode->_prev == nullptr, "invariant"); } if (currentNode->_prev == nullptr) { // Build the doubly linked list to get hold of @@ -1331,25 +1333,35 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren return; } + // If we get here it means the current thread enqueued itself on the + // _entry_list but was then able to "steal" the lock before the + // chosen successor was able to. Consequently currentNode must be an + // interior node in the _entry_list, or the head. assert(currentNode->_next != nullptr, "invariant"); assert(currentNode != _entry_list_tail, "invariant"); + // Check if we are in the singly-linked portion of the + // _entry_list. If we are the head then we try to remove ourselves, + // else we convert to the doubly-linked list. if (currentNode->_prev == nullptr) { ObjectWaiter* v = Atomic::load_acquire(&_entry_list); assert(v != nullptr, "invariant"); if (v == currentNode) { + ObjectWaiter* next = currentNode->_next; // currentNode is at the head of _entry_list. - if (Atomic::cmpxchg(&_entry_list, v, currentNode->_next) == v) { - // The CAS above sucsessfully unlinked currentNode from the head of the _entry_list. + if (Atomic::cmpxchg(&_entry_list, v, next) == v) { + // The CAS above sucsessfully unlinked currentNode from the + // head of the _entry_list. assert(_entry_list != v, "invariant"); - _entry_list->_prev = nullptr; + next->_prev = nullptr; set_bad_pointers(currentNode); return; } else { // The CAS above can fail from interference IFF a contending - // thread "pushed" itself onto entry_list, in which case - // currentNode must now be in the interior of the list. + // thread "pushed" itself onto _entry_list, in which case + // currentNode must now be in the interior of the + // list. Fall-through to building the doubly-linked list. assert(_entry_list != currentNode, "invariant"); } } @@ -1359,7 +1371,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren assert(currentNode->_prev != nullptr, "must be"); } - // We now assume we are unlinking currentNode from the interior of a + // We now know we are unlinking currentNode from the interior of a // doubly linked list. assert(currentNode->_next != nullptr, ""); assert(currentNode->_prev != nullptr, ""); From b4fb7fd30a216e5bf41eca05a2264e7c2be92f72 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Thu, 20 Feb 2025 09:58:37 +0100 Subject: [PATCH 5/8] Atomic hygiene --- src/hotspot/share/runtime/objectMonitor.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index a63f19a743362..bfbc881e191a6 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -168,7 +168,7 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // Since the successor is chosen in FIFO order, the exiting thread // needs to find the tail of the entry_list. This is done by walking // from the entry_list head. While walking the list we also assign -// the prev pointers of each thread, essentially forming a doubly +// the prev pointers of each thread, essentially forming a doubly // linked list, see 2) below. // // Once we have formed a doubly linked list it's easy to find the @@ -701,7 +701,7 @@ void ObjectMonitor::add_to_entry_list(JavaThread* current, ObjectWaiter* node) { node->TState = ObjectWaiter::TS_ENTER; for (;;) { - ObjectWaiter* front = Atomic::load_acquire(&_entry_list); + ObjectWaiter* front = Atomic::load(&_entry_list); node->_next = front; if (Atomic::cmpxchg(&_entry_list, front, node) == front) { @@ -720,7 +720,7 @@ bool ObjectMonitor::try_lock_or_add_to_entry_list(JavaThread* current, ObjectWai node->TState = ObjectWaiter::TS_ENTER; for (;;) { - ObjectWaiter* front = Atomic::load_acquire(&_entry_list); + ObjectWaiter* front = Atomic::load(&_entry_list); node->_next = front; if (Atomic::cmpxchg(&_entry_list, front, node) == front) { @@ -1261,6 +1261,8 @@ ObjectWaiter* ObjectMonitor::entry_list_tail(JavaThread* current) { if (w != nullptr) { return w; } + // Need acquire here to match the implicit release of the cmpxchg + // that updated _entry_list, so we can access w->_next. w = Atomic::load_acquire(&_entry_list); assert(w != nullptr, "invariant"); if (w->_next == nullptr) { @@ -1303,7 +1305,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren if (currentNode->_next == nullptr) { assert(_entry_list_tail == nullptr || _entry_list_tail == currentNode, "invariant"); - ObjectWaiter* v = Atomic::load_acquire(&_entry_list); + ObjectWaiter* v = Atomic::load(&_entry_list); if (v == currentNode) { // The currentNode is the only element in _entry_list. if (Atomic::cmpxchg(&_entry_list, v, (ObjectWaiter*)nullptr) == v) { @@ -1322,7 +1324,6 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren _entry_list_tail = nullptr; entry_list_tail(current); assert(currentNode->_prev != nullptr, "must be"); - } // The currentNode is the last element in _entry_list and we know // which element is the previous one. @@ -1344,7 +1345,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // _entry_list. If we are the head then we try to remove ourselves, // else we convert to the doubly-linked list. if (currentNode->_prev == nullptr) { - ObjectWaiter* v = Atomic::load_acquire(&_entry_list); + ObjectWaiter* v = Atomic::load(&_entry_list); assert(v != nullptr, "invariant"); if (v == currentNode) { @@ -1531,7 +1532,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { ObjectWaiter* w = nullptr; - w = _entry_list; + w = Atomic::load(&_entry_list); if (w != nullptr) { w = entry_list_tail(current); // I'd like to write: guarantee (w->_thread != current). From e1d4fac67a49adc8b21c7f080f8d6f4190a6c94a Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Mon, 24 Feb 2025 15:39:11 +0100 Subject: [PATCH 6/8] Moved set_bad_pointers() and added accessors. --- src/hotspot/share/runtime/objectMonitor.cpp | 51 +++++++++------------ src/hotspot/share/runtime/objectMonitor.hpp | 18 ++++++++ 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 7072905a65c88..ce704248e70d6 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -1265,7 +1265,7 @@ ObjectWaiter* ObjectMonitor::entry_list_tail(JavaThread* current) { // that updated _entry_list, so we can access w->_next. w = Atomic::load_acquire(&_entry_list); assert(w != nullptr, "invariant"); - if (w->_next == nullptr) { + if (w->next() == nullptr) { _entry_list_tail = w; return w; } @@ -1274,21 +1274,12 @@ ObjectWaiter* ObjectMonitor::entry_list_tail(JavaThread* current) { assert(w->TState == ObjectWaiter::TS_ENTER, "invariant"); w->_prev = prev; prev = w; - w = w->_next; + w = w->next(); } _entry_list_tail = prev; return prev; } -static void set_bad_pointers(ObjectWaiter* currentNode) { -#ifdef ASSERT - // Diagnostic hygiene ... - currentNode->_prev = (ObjectWaiter*) 0xBAD; - currentNode->_next = (ObjectWaiter*) 0xBAD; - currentNode->TState = ObjectWaiter::TS_RUN; -#endif -} - // By convention we unlink a contending thread from _entry_list immediately // after the thread acquires the lock in ::enter(). Equally, we could defer // unlinking the thread until ::exit()-time. @@ -1302,7 +1293,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // Check if we are unlinking the last element in the _entry_list. // This is by far the most common case. - if (currentNode->_next == nullptr) { + if (currentNode->next() == nullptr) { assert(_entry_list_tail == nullptr || _entry_list_tail == currentNode, "invariant"); ObjectWaiter* v = Atomic::load(&_entry_list); @@ -1310,27 +1301,27 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // The currentNode is the only element in _entry_list. if (Atomic::cmpxchg(&_entry_list, v, (ObjectWaiter*)nullptr) == v) { _entry_list_tail = nullptr; - set_bad_pointers(currentNode); + currentNode->set_bad_pointers(); return; } // The CAS above can fail from interference IFF a contending // thread "pushed" itself onto entry_list. So fall-through to // building the doubly-linked list. - assert(currentNode->_prev == nullptr, "invariant"); + assert(currentNode->prev() == nullptr, "invariant"); } - if (currentNode->_prev == nullptr) { + if (currentNode->prev() == nullptr) { // Build the doubly linked list to get hold of - // currentNode->_prev. + // currentNode->prev(). _entry_list_tail = nullptr; entry_list_tail(current); - assert(currentNode->_prev != nullptr, "must be"); + assert(currentNode->prev() != nullptr, "must be"); } // The currentNode is the last element in _entry_list and we know // which element is the previous one. assert(_entry_list != currentNode, "invariant"); - _entry_list_tail = currentNode->_prev; + _entry_list_tail = currentNode->prev(); _entry_list_tail->_next = nullptr; - set_bad_pointers(currentNode); + currentNode->set_bad_pointers(); return; } @@ -1338,25 +1329,25 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // _entry_list but was then able to "steal" the lock before the // chosen successor was able to. Consequently currentNode must be an // interior node in the _entry_list, or the head. - assert(currentNode->_next != nullptr, "invariant"); + assert(currentNode->next() != nullptr, "invariant"); assert(currentNode != _entry_list_tail, "invariant"); // Check if we are in the singly-linked portion of the // _entry_list. If we are the head then we try to remove ourselves, // else we convert to the doubly-linked list. - if (currentNode->_prev == nullptr) { + if (currentNode->prev() == nullptr) { ObjectWaiter* v = Atomic::load(&_entry_list); assert(v != nullptr, "invariant"); if (v == currentNode) { - ObjectWaiter* next = currentNode->_next; + ObjectWaiter* next = currentNode->next(); // currentNode is at the head of _entry_list. if (Atomic::cmpxchg(&_entry_list, v, next) == v) { // The CAS above sucsessfully unlinked currentNode from the // head of the _entry_list. assert(_entry_list != v, "invariant"); next->_prev = nullptr; - set_bad_pointers(currentNode); + currentNode->set_bad_pointers(); return; } else { // The CAS above can fail from interference IFF a contending @@ -1366,27 +1357,27 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren assert(_entry_list != currentNode, "invariant"); } } - // Build the doubly linked list to get hold of currentNode->_prev. + // Build the doubly linked list to get hold of currentNode->prev(). _entry_list_tail = nullptr; entry_list_tail(current); - assert(currentNode->_prev != nullptr, "must be"); + assert(currentNode->prev() != nullptr, "must be"); } // We now know we are unlinking currentNode from the interior of a // doubly linked list. - assert(currentNode->_next != nullptr, ""); - assert(currentNode->_prev != nullptr, ""); + assert(currentNode->next() != nullptr, ""); + assert(currentNode->prev() != nullptr, ""); assert(currentNode != _entry_list, ""); assert(currentNode != _entry_list_tail, ""); - ObjectWaiter* nxt = currentNode->_next; - ObjectWaiter* prv = currentNode->_prev; + ObjectWaiter* nxt = currentNode->next(); + ObjectWaiter* prv = currentNode->prev(); assert(nxt->TState == ObjectWaiter::TS_ENTER, "invariant"); assert(prv->TState == ObjectWaiter::TS_ENTER, "invariant"); nxt->_prev = prv; prv->_next = nxt; - set_bad_pointers(currentNode); + currentNode->set_bad_pointers(); } // ----------------------------------------------------------------------------- diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index 5911dc33f052f..f9fc460cc3399 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -72,6 +72,24 @@ class ObjectWaiter : public CHeapObj { oop vthread() const; void wait_reenter_begin(ObjectMonitor *mon); void wait_reenter_end(ObjectMonitor *mon); + + ObjectWaiter* const badObjectWaiterPtr = (ObjectWaiter*) 0xBAD; + void set_bad_pointers() { +#ifdef ASSERT + // Diagnostic hygiene ... + this->_prev = badObjectWaiterPtr; + this->_next = badObjectWaiterPtr; + this->TState = ObjectWaiter::TS_RUN; +#endif + } + ObjectWaiter* next() { + assert (_next != badObjectWaiterPtr, "corrupted list!"); + return _next; + } + ObjectWaiter* prev() { + assert (_prev != badObjectWaiterPtr, "corrupted list!"); + return _prev; + } }; // The ObjectMonitor class implements the heavyweight version of a From 283c2431ec64b0865d4e678913c636732d01658f Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Thu, 27 Feb 2025 16:36:39 +0100 Subject: [PATCH 7/8] Update after review by David and Coleen. --- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 2 - src/hotspot/share/jvmci/vmStructs_jvmci.cpp | 2 +- src/hotspot/share/prims/jvm.cpp | 2 +- src/hotspot/share/runtime/objectMonitor.cpp | 171 +++++++++--------- src/hotspot/share/runtime/objectMonitor.hpp | 7 +- 5 files changed, 88 insertions(+), 96 deletions(-) diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index c4303da8a1178..783dbf546cf8f 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -414,8 +414,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // Despite our balanced locking property we still check that m->_owner == Self // as java routines or native JNI code called by this thread might // have released the lock. - // Refer to the comments in synchronizer.cpp for how we might encode extra - // state in _succ so we can avoid fetching entry_list. // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index cf25f44fd29c1..a49ccfe6e0780 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -328,7 +328,7 @@ \ volatile_nonstatic_field(ObjectMonitor, _owner, int64_t) \ volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \ - volatile_nonstatic_field(ObjectMonitor, _entry_list, ObjectWaiter*) \ + volatile_nonstatic_field(ObjectMonitor, _entry_list, ObjectWaiter*) \ volatile_nonstatic_field(ObjectMonitor, _succ, int64_t) \ volatile_nonstatic_field(ObjectMonitor, _stack_locker, BasicLock*) \ \ diff --git a/src/hotspot/share/prims/jvm.cpp b/src/hotspot/share/prims/jvm.cpp index 33d82045b6eae..d2a5b09ba5fde 100644 --- a/src/hotspot/share/prims/jvm.cpp +++ b/src/hotspot/share/prims/jvm.cpp @@ -3776,7 +3776,7 @@ JVM_ENTRY(jobject, JVM_TakeVirtualThreadListToUnblock(JNIEnv* env, jclass ignore ParkEvent* parkEvent = ObjectMonitor::vthread_unparker_ParkEvent(); assert(parkEvent != nullptr, "not initialized"); - OopHandle& list_head = ObjectMonitor::vthread_cxq_head(); + OopHandle& list_head = ObjectMonitor::vthread_list_head(); oop vthread_head = nullptr; while (true) { if (list_head.peek() != nullptr) { diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index ce704248e70d6..3aba3e95cd1b0 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -116,7 +116,7 @@ DEBUG_ONLY(static volatile bool InitDone = false;) OopStorage* ObjectMonitor::_oop_storage = nullptr; -OopHandle ObjectMonitor::_vthread_cxq_head; +OopHandle ObjectMonitor::_vthread_list_head; ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // ----------------------------------------------------------------------------- @@ -158,12 +158,24 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // Instead the exiting thread releases ownership and possibly wakes // a successor, so the successor can (re)compete for ownership of the lock. // -// The entry_list is a linked list, the first contending thread that -// "pushed" itself onto entry_list, will be the last thread in the -// list. Each newly pushed thread in entry_list will be linked trough -// its next pointer, and have its prev pointer set to null. Thus -// pushing six threads A-F (in that order) onto entry_list, will -// form a singly-linked list, see 1) below. +// * The entry_list forms a queue of threads stalled trying to acquire +// the lock. Within the entry_list the next pointers always form a +// consistent singly linked list. At unlock-time when the unlocking +// thread notices that the tail of the entry_list is not known, we +// convert the singly linked entry_list into a doubly linked list by +// assigning the prev pointers and the entry_list_tail pointer. +// +// Example: +// +// The first contending thread that "pushed" itself onto entry_list, +// will be the last thread in the list. Each newly pushed thread in +// entry_list will be linked through its next pointer, and have its +// prev pointer set to null. Thus pushing six threads A-F (in that +// order) onto entry_list, will form a singly linked list, see 1) +// below. +// +// 1) entry_list ->F->E->D->C->B->A->null +// entry_list_tail ->null // // Since the successor is chosen in FIFO order, the exiting thread // needs to find the tail of the entry_list. This is done by walking @@ -171,32 +183,25 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // the prev pointers of each thread, essentially forming a doubly // linked list, see 2) below. // -// Once we have formed a doubly linked list it's easy to find the -// successor, wake it up, have it remove itself, and update the -// tail pointer, as seen in 2) and 3) below. -// -// At any time new threads can add themselves to the entry_list, see -// 4) and 5). -// -// If the thread that removes itself from the end of the list hasn't -// got any prev pointer, we just set the tail pointer to null, see -// 5) and 6). -// -// Next time we need to find the successor and the tail is null, we -// just start walking from the entry_list head again forming a new -// doubly linked list, see 6) and 7) below. -// -// 1) entry_list ->F->E->D->C->B->A->null -// entry_list_tail ->null -// // 2) entry_list ->F<=>E<=>D<=>C<=>B<=>A->null // entry_list_tail ----------------------^ // +// Once we have formed a doubly linked list it's easy to find the +// successor (A), wake it up, have it remove itself, and update the +// tail pointer, as seen in and 3) below. +// // 3) entry_list ->F<=>E<=>D<=>C<=>B->null // entry_list_tail ------------------^ // -// 4) entry_list ->F->null -// entry_list_tail --^ +// At any time new threads can add themselves to the entry_list, see +// 4) below. +// +// 4) entry_list ->I->H->G->F<=>E<=>D->null +// entry_list_tail -------------------^ +// +// If the thread (F) that removes itself from the end of the list +// hasn't got any prev pointer, we just set the tail pointer to +// null, see 5) and 6) below. // // 5) entry_list ->I->H->G->F->null // entry_list_tail -----------^ @@ -204,39 +209,33 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // 6) entry_list ->I->H->G->null // entry_list_tail ->null // +// Next time we need to find the successor and the tail is null, we +// just start walking from the entry_list head again, forming a new +// doubly linked list, see 7) below. +// // 7) entry_list ->I<=>H<=>G->null // entry_list_tail ----------^ // -// * Concurrency invariants: -// -// -- only the monitor owner may assign prev pointers in entry_list. -// The mutex property of the monitor itself protects the entry_list -// from concurrent interference. -// -- Only the monitor owner may detach nodes from the entry_list. +// * The monitor itself protects all of the operations on the +// entry_list except for the CAS of a new arrival to the head. Only +// the monitor owner can read or write the prev links (e.g. to +// remove itself) or update the tail. // // * The monitor entry list operations avoid locks, but strictly speaking // they're not lock-free. Enter is lock-free, exit is not. // For a description of 'Methods and apparatus providing non-blocking access // to a resource,' see U.S. Pat. No. 7844973. // -// * The entry_list can have multiple concurrent "pushers" but only one -// concurrent detaching thread. This mechanism is immune from the -// ABA corruption. More precisely, the CAS-based "push" onto -// entry_list is ABA-oblivious. -// -// * The entry_list form a queue of threads stalled trying to acquire -// the lock. Within the entry_list the next pointers always form a -// consistent singly-linked list. At unlock-time when the unlocking -// thread notices that the tail of the entry_list is not known, we -// convert the singly-linked entry_list into a doubly linked list by -// assigning the prev pointers and the entry_list_tail pointer. +// * The entry_list can have multiple concurrent "pushers" but only +// one concurrent detaching thread. There is no ABA-problem with +// this usage of CAS. // -// As long as the entry_list_tail is known the odds are good that we +// * As long as the entry_list_tail is known the odds are good that we // should be able to dequeue after acquisition (in the ::enter() // epilogue) in constant-time. This is good since a key desideratum // is to minimize queue & monitor metadata manipulation that occurs // while holding the monitor lock -- that is, we want to minimize -// monitor lock holds times. Note that even a small amount of fixed +// monitor lock holds times. Note that even a small amount of fixed // spinning will greatly reduce the # of enqueue-dequeue operations // on entry_list. That is, spinning relieves contention on the // "inner" locks and monitor metadata. @@ -244,20 +243,20 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // Insert and delete operations may not operate in constant-time if // we have interference because some other thread is adding or // removing the head element of entry_list or if we need to convert -// the singly-linked entry_list into a doubly linked list to find the +// the singly linked entry_list into a doubly linked list to find the // tail. // // * The monitor synchronization subsystem avoids the use of native // synchronization primitives except for the narrow platform-specific -// park-unpark abstraction. See the comments in os_posix.cpp regarding -// the semantics of park-unpark. Put another way, this monitor implementation +// park-unpark abstraction. See the comments in os_posix.cpp regarding +// the semantics of park-unpark. Put another way, this monitor implementation // depends only on atomic operations and park-unpark. // // * Waiting threads reside on the WaitSet list -- wait() puts // the caller onto the WaitSet. // // * notify() or notifyAll() simply transfers threads from the WaitSet -// to either the entry_list. Subsequent exit() operations will +// to the entry_list. Subsequent exit() operations will // unpark/re-schedule the notifyee. Unparking/re-scheduling a // notifyee in notify() is inefficient - it's likely the notifyee // would simply impale itself on the lock held by the notifier. @@ -694,23 +693,22 @@ ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) { return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference; } -// Push "current" onto the front of the _entry_list. Once on _entry_list, +// Push "current" onto the head of the _entry_list. Once on _entry_list, // current stays on-queue until it acquires the lock. void ObjectMonitor::add_to_entry_list(JavaThread* current, ObjectWaiter* node) { node->_prev = nullptr; node->TState = ObjectWaiter::TS_ENTER; for (;;) { - ObjectWaiter* front = Atomic::load(&_entry_list); - - node->_next = front; - if (Atomic::cmpxchg(&_entry_list, front, node) == front) { + ObjectWaiter* head = Atomic::load(&_entry_list); + node->_next = head; + if (Atomic::cmpxchg(&_entry_list, head, node) == head) { return; } } } -// Push "current" onto the front of the entry_list. +// Push "current" onto the head of the entry_list. // If the _entry_list was changed during our push operation, we try to // lock the monitor. Returns true if we locked the monitor, and false // if we added current to _entry_list. Once on _entry_list, current @@ -720,15 +718,14 @@ bool ObjectMonitor::try_lock_or_add_to_entry_list(JavaThread* current, ObjectWai node->TState = ObjectWaiter::TS_ENTER; for (;;) { - ObjectWaiter* front = Atomic::load(&_entry_list); - - node->_next = front; - if (Atomic::cmpxchg(&_entry_list, front, node) == front) { + ObjectWaiter* head = Atomic::load(&_entry_list); + node->_next = head; + if (Atomic::cmpxchg(&_entry_list, head, node) == head) { return false; } - // Interference - the CAS failed because _entry_list changed. Just retry. - // As an optional optimization we retry the lock. + // Interference - the CAS failed because _entry_list changed. Before + // retrying the CAS retry taking the lock as it may now be free. if (TryLock(current) == TryLockResult::Success) { assert(!has_successor(current), "invariant"); assert(has_owner(current), "invariant"); @@ -807,9 +804,10 @@ bool ObjectMonitor::deflate_monitor(Thread* current) { guarantee(contentions() < 0, "must be negative: contentions=%d", contentions()); guarantee(_waiters == 0, "must be 0: waiters=%d", _waiters); - guarantee(_entry_list == nullptr, + ObjectWaiter* w = Atomic::load(&_entry_list); + guarantee(w == nullptr, "must be no entering threads: entry_list=" INTPTR_FORMAT, - p2i(_entry_list)); + p2i(w)); if (obj != nullptr) { if (log_is_enabled(Trace, monitorinflation)) { @@ -1280,9 +1278,8 @@ ObjectWaiter* ObjectMonitor::entry_list_tail(JavaThread* current) { return prev; } -// By convention we unlink a contending thread from _entry_list immediately -// after the thread acquires the lock in ::enter(). Equally, we could defer -// unlinking the thread until ::exit()-time. +// By convention we unlink a contending thread from _entry_list +// immediately after the thread acquires the lock in ::enter(). // The head of _entry_list is volatile but the interior is stable. // In addition, current.TState is stable. @@ -1296,17 +1293,17 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren if (currentNode->next() == nullptr) { assert(_entry_list_tail == nullptr || _entry_list_tail == currentNode, "invariant"); - ObjectWaiter* v = Atomic::load(&_entry_list); - if (v == currentNode) { + ObjectWaiter* w = Atomic::load(&_entry_list); + if (w == currentNode) { // The currentNode is the only element in _entry_list. - if (Atomic::cmpxchg(&_entry_list, v, (ObjectWaiter*)nullptr) == v) { + if (Atomic::cmpxchg(&_entry_list, w, (ObjectWaiter*)nullptr) == w) { _entry_list_tail = nullptr; currentNode->set_bad_pointers(); return; } // The CAS above can fail from interference IFF a contending // thread "pushed" itself onto entry_list. So fall-through to - // building the doubly-linked list. + // building the doubly linked list. assert(currentNode->prev() == nullptr, "invariant"); } if (currentNode->prev() == nullptr) { @@ -1332,20 +1329,20 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren assert(currentNode->next() != nullptr, "invariant"); assert(currentNode != _entry_list_tail, "invariant"); - // Check if we are in the singly-linked portion of the + // Check if we are in the singly linked portion of the // _entry_list. If we are the head then we try to remove ourselves, - // else we convert to the doubly-linked list. + // else we convert to the doubly linked list. if (currentNode->prev() == nullptr) { - ObjectWaiter* v = Atomic::load(&_entry_list); + ObjectWaiter* w = Atomic::load(&_entry_list); - assert(v != nullptr, "invariant"); - if (v == currentNode) { + assert(w != nullptr, "invariant"); + if (w == currentNode) { ObjectWaiter* next = currentNode->next(); // currentNode is at the head of _entry_list. - if (Atomic::cmpxchg(&_entry_list, v, next) == v) { + if (Atomic::cmpxchg(&_entry_list, w, next) == w) { // The CAS above sucsessfully unlinked currentNode from the // head of the _entry_list. - assert(_entry_list != v, "invariant"); + assert(_entry_list != w, "invariant"); next->_prev = nullptr; currentNode->set_bad_pointers(); return; @@ -1353,7 +1350,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren // The CAS above can fail from interference IFF a contending // thread "pushed" itself onto _entry_list, in which case // currentNode must now be in the interior of the - // list. Fall-through to building the doubly-linked list. + // list. Fall-through to building the doubly linked list. assert(_entry_list != currentNode, "invariant"); } } @@ -1582,7 +1579,7 @@ void ObjectMonitor::ExitEpilog(JavaThread* current, ObjectWaiter* Wakee) { if (vthread == nullptr) { // Platform thread case. Trigger->unpark(); - } else if (java_lang_VirtualThread::set_onWaitingList(vthread, vthread_cxq_head())) { + } else if (java_lang_VirtualThread::set_onWaitingList(vthread, vthread_list_head())) { // Virtual thread case. Trigger->unpark(); } @@ -2009,13 +2006,11 @@ void ObjectMonitor::notify(TRAPS) { OM_PERFDATA_OP(Notifications, inc(1)); } - -// The current implementation of notifyAll() transfers the waiters one-at-a-time -// from the waitset to the entry_list. This could be done more efficiently with a -// single bulk transfer but in practice it's not time-critical. Beware too, -// that in prepend-mode we invert the order of the waiters. Let's say that the -// waitset is "ABCD" and the entry_list is "XYZ". After a notifyAll() in prepend -// mode the waitset will be empty and the entry_list will be "DCBAXYZ". +// notifyAll() transfers the waiters one-at-a-time from the waitset to +// the entry_list. If the waitset is "ABCD" (where A was added first +// and D last) and the entry_list is ->X->Y->Z. After a notifyAll() +// the waitset will be empty and the entry_list will be +// ->D->C->B->A->X->Y->Z, and the next choosen successor will be Z. void ObjectMonitor::notifyAll(TRAPS) { JavaThread* current = THREAD; @@ -2521,7 +2516,7 @@ void ObjectMonitor::Initialize() { // We can't call this during Initialize() because BarrierSet needs to be set. void ObjectMonitor::Initialize2() { - _vthread_cxq_head = OopHandle(JavaThread::thread_oop_storage(), nullptr); + _vthread_list_head = OopHandle(JavaThread::thread_oop_storage(), nullptr); _vthread_unparker_ParkEvent = ParkEvent::Allocate(nullptr); } diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index f9fc460cc3399..919bbd7d007f4 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -76,7 +76,6 @@ class ObjectWaiter : public CHeapObj { ObjectWaiter* const badObjectWaiterPtr = (ObjectWaiter*) 0xBAD; void set_bad_pointers() { #ifdef ASSERT - // Diagnostic hygiene ... this->_prev = badObjectWaiterPtr; this->_next = badObjectWaiterPtr; this->TState = ObjectWaiter::TS_RUN; @@ -158,7 +157,7 @@ class ObjectMonitor : public CHeapObj { static OopStorage* _oop_storage; // List of j.l.VirtualThread waiting to be unblocked by unblocker thread. - static OopHandle _vthread_cxq_head; + static OopHandle _vthread_list_head; // ParkEvent of unblocker thread. static ParkEvent* _vthread_unparker_ParkEvent; @@ -192,7 +191,7 @@ class ObjectMonitor : public CHeapObj { ObjectMonitor* _next_om; // Next ObjectMonitor* linkage volatile intx _recursions; // recursion count, 0 for first entry ObjectWaiter* volatile _entry_list; // Threads blocked on entry or reentry. - // The list is actually composed of WaitNodes, + // The list is actually composed of wait-nodes, // acting as proxies for Threads. ObjectWaiter* volatile _entry_list_tail; // _entry_list is the head, this is the tail. int64_t volatile _succ; // Heir presumptive thread - used for futile wakeup throttling @@ -216,7 +215,7 @@ class ObjectMonitor : public CHeapObj { static void Initialize(); static void Initialize2(); - static OopHandle& vthread_cxq_head() { return _vthread_cxq_head; } + static OopHandle& vthread_list_head() { return _vthread_list_head; } static ParkEvent* vthread_unparker_ParkEvent() { return _vthread_unparker_ParkEvent; } // Only perform a PerfData operation if the PerfData object has been From 0d2d6c34f4ba1a38dfb4a11b3d585c39921bc846 Mon Sep 17 00:00:00 2001 From: Fredrik Bredberg Date: Wed, 5 Mar 2025 13:24:56 +0100 Subject: [PATCH 8/8] Updated comments after review by Patricio. --- src/hotspot/share/runtime/objectMonitor.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index 3aba3e95cd1b0..6885220c979a1 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -199,19 +199,19 @@ ParkEvent* ObjectMonitor::_vthread_unparker_ParkEvent = nullptr; // 4) entry_list ->I->H->G->F<=>E<=>D->null // entry_list_tail -------------------^ // -// If the thread (F) that removes itself from the end of the list -// hasn't got any prev pointer, we just set the tail pointer to -// null, see 5) and 6) below. +// At some point in time the thread (F) that wants to remove itself +// from the end of the list, will not have any prev pointer, see 5) +// below. // // 5) entry_list ->I->H->G->F->null // entry_list_tail -----------^ // -// 6) entry_list ->I->H->G->null -// entry_list_tail ->null +// To resolve this we just start walking from the entry_list head +// again, forming a new doubly linked list, before removing the +// thread (F), see 6) and 7) below. // -// Next time we need to find the successor and the tail is null, we -// just start walking from the entry_list head again, forming a new -// doubly linked list, see 7) below. +// 6) entry_list ->I<=>H<=>G<=>F->null +// entry_list_tail --------------^ // // 7) entry_list ->I<=>H<=>G->null // entry_list_tail ----------^ @@ -1529,8 +1529,8 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { // then calls exit(). Exit release the lock by setting O._owner to null. // Let's say T1 then stalls. T2 acquires O and calls O.notify(). The // notify() operation moves T1 from O's waitset to O's entry_list. T2 then - // release the lock "O". T2 resumes immediately after the ST of null into - // _owner, above. T2 notices that the entry_list is populated, so it + // release the lock "O". T1 resumes immediately after the ST of null into + // _owner, above. T1 notices that the entry_list is populated, so it // reacquires the lock and then finds itself on the entry_list. // Given all that, we have to tolerate the circumstance where "w" is // associated with current.