diff --git a/lldb/include/lldb/Host/ProcessRunLock.h b/lldb/include/lldb/Host/ProcessRunLock.h index 27d10942ddb4c..9446414880739 100644 --- a/lldb/include/lldb/Host/ProcessRunLock.h +++ b/lldb/include/lldb/Host/ProcessRunLock.h @@ -11,7 +11,9 @@ #include #include +#include +#include "lldb/Utility/Policy.h" #include "lldb/lldb-defines.h" /// Enumerations for broadcasting. @@ -21,13 +23,39 @@ namespace lldb_private { /// A class used to prevent the process from starting while other /// threads are accessing its data, and prevent access to its data while it is /// running. +/// +/// The lock is read/write internally, but readers and writers reach it +/// through different APIs: +/// - Readers (any thread that wants the process to stay stopped) call +/// ReadTryLock / ReadUnlock, or use the RAII ProcessRunLocker. +/// - Writers (the private state thread, when transitioning the process +/// between running and stopped) call SetRunning / SetStopped. +/// There is intentionally no public TryWriteLock / WriteUnlock entry point; +/// `ProcessRunLocker` is therefore a read-lock-only helper. class ProcessRunLock { public: ProcessRunLock(); ~ProcessRunLock(); + /// Acquire the read lock without blocking. + /// + /// Returns true if the process is stopped (after this call we hold the + /// read lock; the caller must pair this with ReadUnlock). Returns false + /// if the process is running, in which case the lock is not held. + /// + /// If the per-thread Policy already has run_lock.kind == Private (a + /// nested call from within an outer holder of the read lock on this + /// same thread), this is a no-op acquire: the underlying rwlock is + /// bypassed to avoid deadlocking against a pending writer that the + /// outer reader is itself blocking. See ProcessRunLocker::TryLock for + /// where the Private policy gets pushed. bool ReadTryLock(); + + /// Release the read lock acquired by ReadTryLock. + /// + /// In a re-entrant call (run_lock.kind == Private) this is a no-op, + /// mirroring the no-op acquire in ReadTryLock. bool ReadUnlock(); /// Set the process to running. Returns true if the process was stopped. @@ -38,16 +66,24 @@ class ProcessRunLock { /// Returns false if the process was stopped. bool SetStopped(); + /// RAII helper around the read-lock side of ProcessRunLock. + /// + /// Most callers should use this instead of ReadTryLock / ReadUnlock + /// directly. There is no write-lock variant of the locker because + /// writes go through ProcessRunLock::SetRunning / SetStopped. class ProcessRunLocker { public: ProcessRunLocker() = default; - ProcessRunLocker(ProcessRunLocker &&other) : m_lock(other.m_lock) { + ProcessRunLocker(ProcessRunLocker &&other) + : m_lock(other.m_lock), + m_policy_guard(std::move(other.m_policy_guard)) { other.m_lock = nullptr; } ProcessRunLocker &operator=(ProcessRunLocker &&other) { if (this != &other) { Unlock(); m_lock = other.m_lock; + m_policy_guard = std::move(other.m_policy_guard); other.m_lock = nullptr; } return *this; @@ -57,32 +93,18 @@ class ProcessRunLock { bool IsLocked() const { return m_lock; } - // Try to lock the read lock, but only do so if there are no writers. - bool TryLock(ProcessRunLock *lock) { - if (m_lock) { - if (m_lock == lock) - return true; // We already have this lock locked - else - Unlock(); - } - if (lock) { - if (lock->ReadTryLock()) { - m_lock = lock; - return true; - } - } - return false; - } + /// Try to acquire the read lock. On success, also push a Private + /// run-lock policy so that a re-entrant ReadTryLock on this thread + /// (e.g. SB API re-entry from a frame-provider Python callback) sees + /// run_lock.kind == Private and bypasses the underlying rwlock, + /// preventing deadlock against a pending writer. + bool TryLock(ProcessRunLock *lock); protected: - void Unlock() { - if (m_lock) { - m_lock->ReadUnlock(); - m_lock = nullptr; - } - } + void Unlock(); ProcessRunLock *m_lock = nullptr; + std::optional m_policy_guard; private: ProcessRunLocker(const ProcessRunLocker &) = delete; diff --git a/lldb/include/lldb/Utility/Policy.h b/lldb/include/lldb/Utility/Policy.h index e785f13ab9287..4980688743551 100644 --- a/lldb/include/lldb/Utility/Policy.h +++ b/lldb/include/lldb/Utility/Policy.h @@ -12,6 +12,7 @@ #include "llvm/ADT/SmallVector.h" #include +#include namespace lldb_private { @@ -51,24 +52,35 @@ struct Policy { bool can_run_frame_recognizers = true; }; + /// Per-thread runtime state observed by this policy. + struct State { + /// State of the ProcessRunLock as seen by this thread. + struct RunLock { + /// Which run lock context this thread is operating in. + /// Default Public (external clients). Set to Private when entering + /// the PST, breakpoint callbacks, expression evaluation, etc. + enum class Kind { Public, Private }; + Kind kind = Kind::Public; + + /// True if the current acquisition is re-entrant: the parent policy + /// on the stack already had kind=Private. Auto-detected by the push. + bool reentrant = false; + }; + RunLock run_lock; + }; + View view = View::Public; Capabilities capabilities; + State state; - static Policy PublicState() { return {}; } - - static Policy PrivateState() { - Policy p; - p.view = View::Private; - p.capabilities.can_load_frame_providers = false; - p.capabilities.can_run_frame_recognizers = false; - return p; - } - - static Policy PublicStateRunningExpression() { - Policy p; - p.capabilities.can_run_breakpoint_actions = false; - return p; - } + /// Static factories. Each one starts from PolicyStack::Get().Current() + /// and applies its named state transition on top, so callers don't have + /// to thread the current policy through manually. Defined out-of-line + /// because they reference PolicyStack, which is declared below. + static Policy PublicState(); + static Policy PrivateState(); + static Policy PublicStateRunningExpression(); + static Policy RunLockHeld(); void Dump(Stream &s) const; }; @@ -79,6 +91,10 @@ struct Policy { /// initialized with a default-constructed base entry that is never popped. /// RAII guards (Guard) push and pop policies. /// +/// Policies are pushed via named factory methods (PushPrivateState, etc.) +/// that return an RAII Guard. Direct Push is private to prevent callers +/// from assembling arbitrary capability combinations. +/// /// For thread pool workers that don't inherit thread_local storage, the /// policy must be passed into the lambda and pushed onto the worker /// thread's stack when the task starts. @@ -91,26 +107,60 @@ class PolicyStack { Policy Current() const; - void Push(Policy policy) { m_stack.push_back(std::move(policy)); } - - void Pop() { - assert(!m_stack.empty() && "can't pop the base policy"); - m_stack.pop_back(); - } - void Dump(Stream &s) const; - /// RAII guard that pushes a policy on construction and pops on destruction. + /// RAII guard that pops a policy on destruction. + /// + /// A Guard is bound to the thread that created it: the policy stack lives + /// in thread_local storage, so popping from a different thread would + /// corrupt that thread's stack. Guards may be moved, but only on the + /// owning thread (this is asserted in debug builds; release builds log + /// the violation and then perform the underlying Pop, which will likely + /// corrupt state). class Guard { + friend class PolicyStack; + public: - explicit Guard(Policy policy) { Get().Push(std::move(policy)); } - ~Guard() { Get().Pop(); } + ~Guard(); + Guard(Guard &&other); + Guard &operator=(Guard &&other); Guard(const Guard &) = delete; Guard &operator=(const Guard &) = delete; + + private: + Guard() : m_thread_id(std::this_thread::get_id()), m_active(true) {} + std::thread::id m_thread_id; + bool m_active = false; }; + /// All Push* methods delegate to the named static factories on Policy, + /// which already inherit from Current(). So the pushed policy preserves + /// existing stack state instead of resetting unrelated fields. + + [[nodiscard]] Guard PushPrivateState() { + Push(Policy::PrivateState()); + return Guard(); + } + + [[nodiscard]] Guard PushPublicStateRunningExpression() { + Push(Policy::PublicStateRunningExpression()); + return Guard(); + } + + [[nodiscard]] Guard PushRunLockHeld() { + Push(Policy::RunLockHeld()); + return Guard(); + } + private: + void Push(Policy policy) { m_stack.push_back(std::move(policy)); } + + void Pop() { + assert(!m_stack.empty() && "can't pop the base policy"); + m_stack.pop_back(); + } + llvm::SmallVector m_stack = {Policy{}}; }; diff --git a/lldb/source/Host/common/ProcessRunLock.cpp b/lldb/source/Host/common/ProcessRunLock.cpp index 8e7ef45e1e350..a1a34c4cefc5e 100644 --- a/lldb/source/Host/common/ProcessRunLock.cpp +++ b/lldb/source/Host/common/ProcessRunLock.cpp @@ -6,11 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef _WIN32 #include "lldb/Host/ProcessRunLock.h" +#include +#include + namespace lldb_private { +#ifndef _WIN32 + ProcessRunLock::ProcessRunLock() { int err = ::pthread_rwlock_init(&m_rwlock, nullptr); (void)err; @@ -22,6 +26,35 @@ ProcessRunLock::~ProcessRunLock() { } bool ProcessRunLock::ReadTryLock() { + Policy policy = PolicyStack::Get().Current(); + if (policy.state.run_lock.kind == Policy::State::RunLock::Kind::Private) { + // Re-entrant acquire: an outer reader on this thread already holds a + // read lock on this rwlock, so m_running cannot change underneath us + // (no writer can run while a reader is active). We try to grab a + // second reader slot purely as a sanity check; if it fails, two + // errno values are both acceptable here: + // + // EBUSY - a writer is queued; on a write-preferring rwlock our + // outer reader is keeping the writer blocked, so reading + // m_running directly is still safe. + // EAGAIN - the implementation's max reader count is exhausted; + // also safe for the same reason. + // + // In all three cases (success, EBUSY, EAGAIN) we sample m_running + // and return. + int err = ::pthread_rwlock_tryrdlock(&m_rwlock); + if (err == 0) { + bool stopped = !m_running; + ::pthread_rwlock_unlock(&m_rwlock); + return stopped; + } + assert((err == EBUSY || err == EAGAIN) && + "unexpected pthread_rwlock_tryrdlock failure during re-entrant " + "ProcessRunLock acquire"); + (void)err; + return !m_running; + } + ::pthread_rwlock_rdlock(&m_rwlock); if (!m_running) { // coverity[missing_unlock] @@ -32,6 +65,10 @@ bool ProcessRunLock::ReadTryLock() { } bool ProcessRunLock::ReadUnlock() { + Policy policy = PolicyStack::Get().Current(); + if (policy.state.run_lock.kind == Policy::State::RunLock::Kind::Private) + return true; + return ::pthread_rwlock_unlock(&m_rwlock) == 0; } @@ -51,6 +88,30 @@ bool ProcessRunLock::SetStopped() { return was_running; } -} // namespace lldb_private +#endif // !_WIN32 -#endif +bool ProcessRunLock::ProcessRunLocker::TryLock(ProcessRunLock *lock) { + if (m_lock) { + if (m_lock == lock) + return true; + Unlock(); + } + if (lock) { + if (lock->ReadTryLock()) { + m_lock = lock; + m_policy_guard = PolicyStack::Get().PushRunLockHeld(); + return true; + } + } + return false; +} + +void ProcessRunLock::ProcessRunLocker::Unlock() { + if (m_lock) { + m_policy_guard.reset(); + m_lock->ReadUnlock(); + m_lock = nullptr; + } +} + +} // namespace lldb_private diff --git a/lldb/source/Host/windows/ProcessRunLock.cpp b/lldb/source/Host/windows/ProcessRunLock.cpp index ac97c3292a324..9c49f416c2a43 100644 --- a/lldb/source/Host/windows/ProcessRunLock.cpp +++ b/lldb/source/Host/windows/ProcessRunLock.cpp @@ -8,6 +8,7 @@ #include "lldb/Host/ProcessRunLock.h" #include "lldb/Host/windows/windows.h" +#include "lldb/Utility/Policy.h" static PSRWLOCK GetLock(lldb::rwlock_t lock) { return static_cast(lock); @@ -18,6 +19,10 @@ static bool ReadLock(lldb::rwlock_t rwlock) { return true; } +static bool ReadTryLock(lldb::rwlock_t rwlock) { + return ::TryAcquireSRWLockShared(GetLock(rwlock)); +} + static bool ReadUnlock(lldb::rwlock_t rwlock) { ::ReleaseSRWLockShared(GetLock(rwlock)); return true; @@ -33,7 +38,7 @@ static bool WriteUnlock(lldb::rwlock_t rwlock) { return true; } -using namespace lldb_private; +namespace lldb_private { ProcessRunLock::ProcessRunLock() : m_running(false) { m_rwlock = new SRWLOCK; @@ -43,6 +48,18 @@ ProcessRunLock::ProcessRunLock() : m_running(false) { ProcessRunLock::~ProcessRunLock() { delete static_cast(m_rwlock); } bool ProcessRunLock::ReadTryLock() { + Policy policy = PolicyStack::Get().Current(); + if (policy.state.run_lock.kind == Policy::State::RunLock::Kind::Private) { + if (::ReadTryLock(m_rwlock)) { + bool stopped = !m_running; + ::ReadUnlock(m_rwlock); + return stopped; + } + // trylock failed due to a pending writer, but no writer can + // modify m_running while the outer read lock is held. + return !m_running; + } + ::ReadLock(m_rwlock); if (m_running == false) return true; @@ -50,7 +67,13 @@ bool ProcessRunLock::ReadTryLock() { return false; } -bool ProcessRunLock::ReadUnlock() { return ::ReadUnlock(m_rwlock); } +bool ProcessRunLock::ReadUnlock() { + Policy policy = PolicyStack::Get().Current(); + if (policy.state.run_lock.kind == Policy::State::RunLock::Kind::Private) + return true; + + return ::ReadUnlock(m_rwlock); +} bool ProcessRunLock::SetRunning() { WriteLock(m_rwlock); @@ -67,3 +90,5 @@ bool ProcessRunLock::SetStopped() { WriteUnlock(m_rwlock); return was_running; } + +} // namespace lldb_private diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 39399480b0806..670c2c2ffbdda 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -4349,7 +4349,7 @@ thread_result_t Process::RunPrivateStateThread(bool is_override) { // They must see parent frames, not provider-augmented frames. std::optional policy_guard; if (is_override) - policy_guard.emplace(Policy::PrivateState()); + policy_guard = PolicyStack::Get().PushPrivateState(); bool control_only = true; @@ -5549,7 +5549,7 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx, // GetStackFrameList returns parent frames during event processing. std::optional policy_guard; if (backup_private_state_thread) - policy_guard.emplace(Policy::PrivateState()); + policy_guard = PolicyStack::Get().PushPrivateState(); while (true) { // We usually want to resume the process if we get to the top of the @@ -5621,10 +5621,10 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx, Halt(clear_thread_plans, use_run_lock); } - diagnostic_manager.Printf( - lldb::eSeverityError, - "didn't get running event after initial resume, got %s instead.", - StateAsCString(stop_state)); + diagnostic_manager.Printf(lldb::eSeverityError, + "didn't get running event after initial " + "resume, got %s instead.", + StateAsCString(stop_state)); return_value = eExpressionSetupError; break; } diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index f438d368c9ba8..c20b0ed07ee3c 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -193,7 +193,7 @@ class StopInfoBreakpoint : public StopInfo { bool ShouldStopSynchronous(Event *event_ptr) override { // Breakpoint callbacks run on the PST during stop processing. Push // private state context so callback code sees the private reality. - PolicyStack::Guard policy_guard(Policy::PrivateState()); + PolicyStack::Guard policy_guard = PolicyStack::Get().PushPrivateState(); ThreadSP thread_sp(m_thread_wp.lock()); if (thread_sp) { @@ -903,7 +903,7 @@ class StopInfoWatchpoint : public StopInfo { bool ShouldStopSynchronous(Event *event_ptr) override { // Watchpoint callbacks run on the PST during stop processing. Push // private state context so callback code sees the private reality. - PolicyStack::Guard policy_guard(Policy::PrivateState()); + PolicyStack::Guard policy_guard = PolicyStack::Get().PushPrivateState(); // If we are running our step-over the watchpoint plan, stop if it's done // and continue if it's not: diff --git a/lldb/source/Utility/Policy.cpp b/lldb/source/Utility/Policy.cpp index 2abc0d6281161..779147c676432 100644 --- a/lldb/source/Utility/Policy.cpp +++ b/lldb/source/Utility/Policy.cpp @@ -12,6 +12,24 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" +#include "llvm/Support/FormatVariadic.h" + +#include + +// Allow std::thread::id to be passed directly to formatv-based logging +// macros (LLDB_LOG, etc.). std::thread::id has no raw_ostream operator<<, +// so we route through std::stringstream which it does support. +namespace llvm { +template <> struct format_provider { + static void format(const std::thread::id &id, raw_ostream &os, + StringRef /*style*/) { + std::stringstream ss; + ss << id; + os << ss.str(); + } +}; +} // namespace llvm + using namespace lldb_private; Policy PolicyStack::Current() const { @@ -24,6 +42,91 @@ Policy PolicyStack::Current() const { return p; } +// PublicState is the baseline, not a transition. The stack returns to +// public state by popping the private-state guards, not by pushing a +// "public" policy on top. This factory exists only as a reference value +// (tests, dump comparisons); it never reads the current stack. +Policy Policy::PublicState() { return {}; } + +Policy Policy::PrivateState() { + Policy p = PolicyStack::Get().Current(); + p.view = View::Private; + p.capabilities.can_load_frame_providers = false; + p.capabilities.can_run_frame_recognizers = false; + return p; +} + +Policy Policy::PublicStateRunningExpression() { + Policy p = PolicyStack::Get().Current(); + p.capabilities.can_run_breakpoint_actions = false; + return p; +} + +Policy Policy::RunLockHeld() { + Policy p = PolicyStack::Get().Current(); + if (p.state.run_lock.kind == State::RunLock::Kind::Private) + p.state.run_lock.reentrant = true; + p.state.run_lock.kind = State::RunLock::Kind::Private; + return p; +} + +PolicyStack::Guard::~Guard() { + if (!m_active) + return; + std::thread::id current = std::this_thread::get_id(); + if (m_thread_id != current) + LLDB_LOG(GetLog(LLDBLog::Process), + "PolicyStack::Guard destroyed on thread {0} but was created on " + "thread {1}; this would corrupt thread {0}'s policy stack", + current, m_thread_id); + assert(m_thread_id == current && + "PolicyStack::Guard destroyed on a different thread than the one " + "that created it (see preceding log line for thread ids)"); + Get().Pop(); +} + +PolicyStack::Guard::Guard(Guard &&other) + : m_thread_id(other.m_thread_id), m_active(other.m_active) { + std::thread::id current = std::this_thread::get_id(); + if (m_thread_id != current) + LLDB_LOG(GetLog(LLDBLog::Process), + "PolicyStack::Guard move-constructed on thread {0} but was " + "created on thread {1}", + current, m_thread_id); + assert(m_thread_id == current && "PolicyStack::Guard moved across threads " + "(see preceding log line for thread ids)"); + other.m_active = false; +} + +PolicyStack::Guard &PolicyStack::Guard::operator=(Guard &&other) { + if (this != &other) { + std::thread::id current = std::this_thread::get_id(); + if (other.m_thread_id != current) + LLDB_LOG(GetLog(LLDBLog::Process), + "PolicyStack::Guard move-assigned on thread {0} from a guard " + "created on thread {1}", + current, other.m_thread_id); + assert(other.m_thread_id == current && + "PolicyStack::Guard move-assigned across threads " + "(see preceding log line for thread ids)"); + if (m_active) { + if (m_thread_id != current) + LLDB_LOG(GetLog(LLDBLog::Process), + "PolicyStack::Guard destroyed during move-assign on thread " + "{0} but was created on thread {1}", + current, m_thread_id); + assert(m_thread_id == current && + "PolicyStack::Guard destroyed on a different thread " + "(see preceding log line for thread ids)"); + Get().Pop(); + } + m_thread_id = other.m_thread_id; + m_active = other.m_active; + other.m_active = false; + } + return *this; +} + void Policy::Dump(Stream &s) const { s << "policy: view=" << (view == View::Public ? "public" : "private"); s << ", capabilities={"; @@ -34,6 +137,12 @@ void Policy::Dump(Stream &s) const { s << " frame_providers=" << capabilities.can_load_frame_providers; s << " frame_recognizers=" << capabilities.can_run_frame_recognizers; s << '}'; + s << ", state={run_lock={"; + s << "kind=" + << (state.run_lock.kind == State::RunLock::Kind::Public ? "public" + : "private"); + s << " reentrant=" << state.run_lock.reentrant; + s << "}}"; } void PolicyStack::Dump(Stream &s) const { diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile new file mode 100644 index 0000000000000..0b710c6e298ae --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c +CFLAGS_EXTRAS := -std=c99 +include Makefile.rules diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py new file mode 100644 index 0000000000000..2933c073ce8c9 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/TestRunLockReentrantDeadlock.py @@ -0,0 +1,125 @@ +""" +Test that ProcessRunLock re-entrant read locks don't deadlock when a frame +provider's get_frame_at_index calls SB API methods. + +This reproduces the deadlock seen in lldb-rpc-server where: + + - An RPC client thread holds a ProcessRunLock read lock (from the outer + SB API call) and enters a provider's get_frame_at_index, which calls + SBFrame.IsValid -> GetStoppedExecutionContext -> ReadTryLock (re-entrant). + + - The override PST is exiting RunPrivateStateThread and calls SetStopped + -> pthread_rwlock_wrlock (blocked by client thread's read lock). + + - The client thread's re-entrant ReadTryLock blocks because the pending + writer prevents new readers on a write-preferring rwlock. + + - The original PST is blocked joining the override thread. +""" + +import os +import threading +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class TestRunLockReentrantDeadlock(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24528") + def test_runlock_reentrant_no_deadlock(self): + """ + Test that a frame provider calling SBFrame.IsValid from + get_frame_at_index does not deadlock with the override PST's + SetStopped when another thread triggers EvaluateExpression. + """ + self.build() + + target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, "main") + + resolver_path = os.path.join(self.getSourceDir(), "bkpt_resolver.py") + provider_path = os.path.join(self.getSourceDir(), "frame_provider.py") + self.runCmd("command script import " + resolver_path) + self.runCmd("command script import " + provider_path) + + error = lldb.SBError() + provider_id = target.RegisterScriptedFrameProvider( + "frame_provider.SBAPIAccessInGetFrameProvider", + lldb.SBStructuredData(), + error, + ) + self.assertTrue( + error.Success(), + f"Should register frame provider: {error}", + ) + self.assertNotEqual(provider_id, 0, "Provider ID should be non-zero") + + extra_args = lldb.SBStructuredData() + stream = lldb.SBStream() + stream.Print('{"symbol": "target_func"}') + extra_args.SetFromJSON(stream) + + bkpt = target.BreakpointCreateFromScript( + "bkpt_resolver.ExprEvalResolver", + extra_args, + lldb.SBFileSpecList(), + lldb.SBFileSpecList(), + ) + self.assertTrue(bkpt.IsValid(), "Scripted breakpoint should be valid") + self.assertGreater( + bkpt.GetNumLocations(), 0, "Breakpoint should have locations" + ) + + # Spawn a thread that repeatedly accesses frames through the provider. + # This simulates an RPC client thread that holds the ProcessRunLock + # read lock and enters get_frame_at_index -> SBFrame.IsValid. + stop_frame_access = threading.Event() + frame_access_error = [None] + + def access_frames(): + try: + while not stop_frame_access.is_set(): + t = process.GetSelectedThread() + if t.IsValid(): + for i in range(t.GetNumFrames()): + f = t.GetFrameAtIndex(i) + f.IsValid() + except Exception as e: + frame_access_error[0] = str(e) + + frame_thread = threading.Thread(target=access_frames, daemon=True) + frame_thread.start() + + # Continue into the breakpoint. was_hit calls EvaluateExpression on + # the PST, which triggers RunThreadPlan -> override PST. + # The frame access thread is concurrently calling GetFrameAtIndex -> + # provider get_frame_at_index -> SBFrame.IsValid. + # Without the fix, this deadlocks. + process.Continue() + self.assertState(process.GetState(), lldb.eStateStopped) + + stop_frame_access.set() + frame_thread.join(timeout=5) + self.assertFalse(frame_thread.is_alive(), "Frame access thread should exit") + self.assertIsNone( + frame_access_error[0], + f"Frame access thread hit an error: {frame_access_error[0]}", + ) + + thread = process.GetSelectedThread() + self.assertTrue(thread.IsValid(), "Thread should be valid") + self.assertEqual( + thread.GetStopReason(), + lldb.eStopReasonBreakpoint, + "Should stop at breakpoint", + ) + + g_value = target.FindFirstGlobalVariable("g_value") + self.assertTrue(g_value.IsValid(), "Should find g_value") + self.assertGreater( + g_value.GetValueAsUnsigned(), + 0, + "g_value should have been incremented by the was_hit callback", + ) diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py new file mode 100644 index 0000000000000..7fb7f8675f2a6 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/bkpt_resolver.py @@ -0,0 +1,47 @@ +""" +Scripted breakpoint resolver whose was_hit callback calls EvaluateExpression. + +This reproduces the deadlock seen in the sample where: +1. BreakpointResolverScripted::WasHit runs on the private state thread +2. was_hit calls SBFrame.EvaluateExpression -> RunThreadPlan -> Halt -> + WaitForProcessToStop (holds a mutex, waits for state event) +3. The override private state thread handles the stop and loads a scripted + frame provider +4. The provider's __init__ calls SBThread.__bool__ -> GetStoppedExecutionContext + -> tries to acquire the same mutex -> DEADLOCK +""" + +import lldb + + +class ExprEvalResolver: + """Scripted breakpoint resolver that evaluates an expression in was_hit.""" + + def __init__(self, bkpt, extra_args, dict): + self.bkpt = bkpt + sym_name = extra_args.GetValueForKey("symbol").GetStringValue(100) + self.sym_name = sym_name + self.facade_loc = None + + def __callback__(self, sym_ctx): + sym = sym_ctx.module.FindSymbol(self.sym_name, lldb.eSymbolTypeCode) + if sym.IsValid(): + self.bkpt.AddLocation(sym.GetStartAddress()) + self.facade_loc = self.bkpt.AddFacadeLocation() + + def get_short_help(self): + return f"ExprEvalResolver for {self.sym_name}" + + def was_hit(self, frame, bp_loc): + # This runs on the private state thread. Calling EvaluateExpression + # here triggers RunThreadPlan -> Halt -> WaitForProcessToStop, which + # holds a mutex and waits for a state change event. + options = lldb.SBExpressionOptions() + options.SetStopOthers(True) + options.SetTryAllThreads(False) + + result = frame.EvaluateExpression("increment()", options) + if not result.error.success: + return lldb.LLDB_INVALID_BREAK_ID + + return self.facade_loc diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py new file mode 100644 index 0000000000000..413484c264513 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/frame_provider.py @@ -0,0 +1,38 @@ +""" +Frame provider whose get_frame_at_index calls SBFrame::IsValid on input frames. + +When a client thread accesses frames through this provider while the PST is +mid-expression-eval (RunThreadPlan), the following deadlock occurs: + + - Client thread: holds ProcessRunLock read lock (from outer SB API call), + enters get_frame_at_index, calls SBFrame.IsValid -> + GetStoppedExecutionContext -> ReadTryLock (re-entrant, blocked by + pending writer) + + - Override PST: finishing RunPrivateStateThread, calls SetStopped -> + pthread_rwlock_wrlock (blocked by client thread's read lock) + +The re-entrant read lock blocks because macOS uses a write-preferring rwlock. +""" + +import lldb +from lldb.plugins.scripted_frame_provider import ScriptedFrameProvider + + +class SBAPIAccessInGetFrameProvider(ScriptedFrameProvider): + """Provider that calls SBFrame.IsValid from get_frame_at_index.""" + + @staticmethod + def get_description(): + return "Provider that accesses SB API in get_frame_at_index" + + def get_frame_at_index(self, idx): + if idx < len(self.input_frames): + frame = self.input_frames.GetFrameAtIndex(idx) + # This call triggers GetStoppedExecutionContext -> + # ProcessRunLock::ReadTryLock. If the current thread already + # holds the read lock (from the outer SB API entry point), + # and a writer is pending, this re-entrant read lock blocks. + frame.IsValid() + return idx + return None diff --git a/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c new file mode 100644 index 0000000000000..9ebeaf3380ed9 --- /dev/null +++ b/lldb/test/API/functionalities/scripted_frame_provider/runlock_reentrant_deadlock/main.c @@ -0,0 +1,18 @@ +#include + +int g_value = 0; + +int increment() { return ++g_value; } + +int target_func() { + printf("target_func: %d\n", g_value); + return g_value; +} + +int main() { + for (int i = 0; i < 10; i++) { + target_func(); + } + increment(); + return 0; +} diff --git a/lldb/unittests/Utility/PolicyTest.cpp b/lldb/unittests/Utility/PolicyTest.cpp index b7771e52b46e4..cfc7bf66393b7 100644 --- a/lldb/unittests/Utility/PolicyTest.cpp +++ b/lldb/unittests/Utility/PolicyTest.cpp @@ -66,20 +66,25 @@ TEST(PolicyTest, StackDefaultIsPublicState) { } TEST(PolicyTest, StackPushPop) { - PolicyStack::Get().Push(Policy::PrivateState()); - EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); - EXPECT_FALSE( - PolicyStack::Get().Current().capabilities.can_load_frame_providers); + { + PolicyStack::Guard guard = PolicyStack::Get().PushPrivateState(); + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + EXPECT_FALSE( + PolicyStack::Get().Current().capabilities.can_load_frame_providers); - PolicyStack::Get().Push(Policy::PublicStateRunningExpression()); - EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Public); - EXPECT_FALSE( - PolicyStack::Get().Current().capabilities.can_run_breakpoint_actions); + { + PolicyStack::Guard inner = + PolicyStack::Get().PushPublicStateRunningExpression(); + // PushPublicStateRunningExpression inherits from Current() and only + // toggles bp_actions; view stays Private. + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + EXPECT_FALSE( + PolicyStack::Get().Current().capabilities.can_run_breakpoint_actions); + } - PolicyStack::Get().Pop(); - EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + } - PolicyStack::Get().Pop(); EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Public); } @@ -87,14 +92,16 @@ TEST(PolicyTest, GuardRAII) { EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Public); { - PolicyStack::Guard guard(Policy::PrivateState()); + PolicyStack::Guard guard = PolicyStack::Get().PushPrivateState(); EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); EXPECT_FALSE( PolicyStack::Get().Current().capabilities.can_load_frame_providers); { - PolicyStack::Guard inner(Policy::PublicStateRunningExpression()); - EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Public); + PolicyStack::Guard inner = + PolicyStack::Get().PushPublicStateRunningExpression(); + // Inherits Private view from outer guard. + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); EXPECT_FALSE( PolicyStack::Get().Current().capabilities.can_run_breakpoint_actions); } @@ -106,7 +113,7 @@ TEST(PolicyTest, GuardRAII) { } TEST(PolicyTest, StackIsPerThread) { - PolicyStack::Get().Push(Policy::PrivateState()); + PolicyStack::Guard guard = PolicyStack::Get().PushPrivateState(); Policy::View other_thread_view; std::thread t([&other_thread_view]() { @@ -116,8 +123,6 @@ TEST(PolicyTest, StackIsPerThread) { EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); EXPECT_EQ(other_thread_view, Policy::View::Public); - - PolicyStack::Get().Pop(); } TEST(PolicyTest, DumpPublicState) { @@ -126,7 +131,8 @@ TEST(PolicyTest, DumpPublicState) { EXPECT_EQ(s.GetString(), "policy: view=public, capabilities={" "eval_expr=true run_all=true try_all=true " - "bp_actions=true frame_providers=true frame_recognizers=true}"); + "bp_actions=true frame_providers=true frame_recognizers=true}" + ", state={run_lock={kind=public reentrant=false}}"); } TEST(PolicyTest, DumpPrivateState) { @@ -135,17 +141,83 @@ TEST(PolicyTest, DumpPrivateState) { EXPECT_EQ(s.GetString(), "policy: view=private, capabilities={" "eval_expr=true run_all=true try_all=true " - "bp_actions=true frame_providers=false frame_recognizers=false}"); + "bp_actions=true frame_providers=false frame_recognizers=false}" + ", state={run_lock={kind=public reentrant=false}}"); } TEST(PolicyTest, DumpStack) { - PolicyStack::Get().Push(Policy::PrivateState()); + PolicyStack::Guard guard = PolicyStack::Get().PushPrivateState(); StreamString s; PolicyStack::Get().Dump(s); EXPECT_NE(s.GetString().find("depth=2"), std::string::npos); EXPECT_NE(s.GetString().find("[0] policy: view=public"), std::string::npos); EXPECT_NE(s.GetString().find("[1] policy: view=private"), std::string::npos); +} + +TEST(PolicyTest, GuardSameThreadMove) { + // Move on the same thread is fine; the moved-into Guard still pops on + // destruction and the moved-from Guard becomes a no-op. + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Public); + { + PolicyStack::Guard outer = PolicyStack::Get().PushPrivateState(); + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + + PolicyStack::Guard moved = std::move(outer); + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + } + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Public); +} + +TEST(PolicyTest, PushInheritsFromCurrent) { + // Push* methods inherit from Current() rather than starting from a + // default Policy: stacking PushPublicStateRunningExpression on top of + // PushPrivateState must preserve the Private view. + PolicyStack::Guard outer = PolicyStack::Get().PushPrivateState(); + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + + PolicyStack::Guard inner = + PolicyStack::Get().PushPublicStateRunningExpression(); + // Capability from inner push. + EXPECT_FALSE( + PolicyStack::Get().Current().capabilities.can_run_breakpoint_actions); + // View inherited from outer push (would be Public if Push reset state). + EXPECT_EQ(PolicyStack::Get().Current().view, Policy::View::Private); + // Capabilities from outer push also inherited. + EXPECT_FALSE( + PolicyStack::Get().Current().capabilities.can_load_frame_providers); +} + +TEST(PolicyTest, RunLockKind) { + EXPECT_EQ(PolicyStack::Get().Current().state.run_lock.kind, + Policy::State::RunLock::Kind::Public); + EXPECT_FALSE(PolicyStack::Get().Current().state.run_lock.reentrant); + + { + PolicyStack::Guard guard = PolicyStack::Get().PushRunLockHeld(); + EXPECT_EQ(PolicyStack::Get().Current().state.run_lock.kind, + Policy::State::RunLock::Kind::Private); + EXPECT_FALSE(PolicyStack::Get().Current().state.run_lock.reentrant); + } + + EXPECT_EQ(PolicyStack::Get().Current().state.run_lock.kind, + Policy::State::RunLock::Kind::Public); +} + +TEST(PolicyTest, RunLockReentrant) { + PolicyStack::Guard outer = PolicyStack::Get().PushRunLockHeld(); + EXPECT_EQ(PolicyStack::Get().Current().state.run_lock.kind, + Policy::State::RunLock::Kind::Private); + EXPECT_FALSE(PolicyStack::Get().Current().state.run_lock.reentrant); + + { + PolicyStack::Guard inner = PolicyStack::Get().PushRunLockHeld(); + EXPECT_EQ(PolicyStack::Get().Current().state.run_lock.kind, + Policy::State::RunLock::Kind::Private); + EXPECT_TRUE(PolicyStack::Get().Current().state.run_lock.reentrant); + } - PolicyStack::Get().Pop(); + EXPECT_EQ(PolicyStack::Get().Current().state.run_lock.kind, + Policy::State::RunLock::Kind::Private); + EXPECT_FALSE(PolicyStack::Get().Current().state.run_lock.reentrant); }