From bccb89914323eb04ca31b9587203e0816354d037 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Tue, 21 Oct 2025 12:33:59 -0700 Subject: [PATCH 1/2] [lldb] Implement Process::ReadMemoryRanges (#163651) This commit introduces a base-class implementation for a method that reads memory from multiple ranges at once. This implementation simply calls the underlying `ReadMemoryFromInferior` method on each requested range, intentionally bypassing the memory caching mechanism (though this may be easily changed in the future). `Process` implementations that can be perform this operation more efficiently - e.g. with the MultiMemPacket described in [1] - are expected to override this method. As an example, this commit changes AppleObjCClassDescriptorV2 to use the new API. Note about the API ------------------ In the RFC, we discussed having the API return some kind of class `ReadMemoryRangesResult`. However, while writing such a class, it became clear that it was merely wrapping a vector, without providing anything useful. For example, this class: ``` struct ReadMemoryRangesResult { ReadMemoryRangesResult( llvm::SmallVector> ranges) : ranges(std::move(ranges)) {} llvm::ArrayRef> getRanges() const { return ranges; } private: llvm::SmallVector> ranges; }; ``` As can be seen in the added test and in the added use-case (AppleObjCClassDescriptorV2), users of this API will just iterate over the vector of memory buffers. So they want a return type that can be iterated over, and the vector seems more natural than creating a new class and defining iterators for it. Likewise, in the RFC, we discussed wrapping the result into an `Expected`. Upon experimenting with the code, this feels like it limits what the API is able to do as the base class implementation never needs to fail the entire result, it's the individual reads that may fail and this is expressed through a zero-length result. Any derived classes overriding `ReadMemoryRanges` should also never produce a top level failure: if they did, they can just fall back to the base class implementation, which would produce a better result. The choice of having the caller allocate a buffer and pass it to `Process::ReadMemoryRanges` is done mostly to follow conventions already done in the Process class. [1]: https://discourse.llvm.org/t/rfc-a-new-vectorized-memory-read-packet/ (cherry picked from commit f2cb99751882bdd96c683cdeb9e8c5301cbb51cc) --- lldb/include/lldb/Target/Process.h | 22 +++ .../AppleObjCClassDescriptorV2.cpp | 23 +-- lldb/source/Target/Process.cpp | 43 ++++++ lldb/unittests/Target/MemoryTest.cpp | 142 ++++++++++++++++++ 4 files changed, 219 insertions(+), 11 deletions(-) diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 9ebcdb4cd06db..74ed8b17289cd 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1585,6 +1585,28 @@ class Process : public std::enable_shared_from_this, virtual size_t ReadMemory(lldb::addr_t vm_addr, void *buf, size_t size, Status &error); + /// Read from multiple memory ranges and write the results into buffer. + /// This calls ReadMemoryFromInferior multiple times, once per range, + /// bypassing the read cache. Process implementations that can perform this + /// operation more efficiently should override this. + /// + /// \param[in] ranges + /// A collection of ranges (base address + size) to read from. + /// + /// \param[out] buffer + /// A buffer where the read memory will be written to. It must be at least + /// as long as the sum of the sizes of each range. + /// + /// \return + /// A vector of MutableArrayRef, where each MutableArrayRef is a slice of + /// the input buffer into which the memory contents were copied. The size + /// of the slice indicates how many bytes were read successfully. Partial + /// reads are always performed from the start of the requested range, + /// never from the middle or end. + virtual llvm::SmallVector> + ReadMemoryRanges(llvm::ArrayRef> ranges, + llvm::MutableArrayRef buffer); + /// Read of memory from a process. /// /// This function has the same semantics of ReadMemory except that it diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp index 6d8f41aef1ffc..7ce4cbf4c61a4 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp @@ -279,22 +279,23 @@ ClassDescriptorV2::ReadMethods(llvm::ArrayRef addresses, const size_t num_methods = addresses.size(); llvm::SmallVector buffer(num_methods * size, 0); - llvm::DenseSet failed_indices; - for (auto [idx, addr] : llvm::enumerate(addresses)) { - Status error; - process->ReadMemory(addr, buffer.data() + idx * size, size, error); - if (error.Fail()) - failed_indices.insert(idx); - } + llvm::SmallVector> mem_ranges = + llvm::to_vector(llvm::map_range(llvm::seq(num_methods), [&](size_t idx) { + return Range(addresses[idx], size); + })); + + llvm::SmallVector> read_results = + process->ReadMemoryRanges(mem_ranges, buffer); llvm::SmallVector methods; methods.reserve(num_methods); - for (auto [idx, addr] : llvm::enumerate(addresses)) { - if (failed_indices.contains(idx)) + for (auto [addr, memory] : llvm::zip(addresses, read_results)) { + // Ignore partial reads. + if (memory.size() != size) continue; - DataExtractor extractor(buffer.data() + idx * size, size, - process->GetByteOrder(), + + DataExtractor extractor(memory.data(), size, process->GetByteOrder(), process->GetAddressByteSize()); methods.push_back(method_t()); methods.back().Read(extractor, process, addr, relative_selector_base_addr, diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 0fb9f8c6b2768..a75c38b0e6d86 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -2111,6 +2111,49 @@ size_t Process::ReadMemory(addr_t addr, void *buf, size_t size, Status &error) { } } +llvm::SmallVector> +Process::ReadMemoryRanges(llvm::ArrayRef> ranges, + llvm::MutableArrayRef buffer) { + auto total_ranges_len = llvm::sum_of( + llvm::map_range(ranges, [](auto range) { return range.size; })); + // If the buffer is not large enough, this is a programmer error. + // In production builds, gracefully fail by returning a length of 0 for all + // ranges. + assert(buffer.size() >= total_ranges_len && "provided buffer is too short"); + if (buffer.size() < total_ranges_len) { + llvm::MutableArrayRef empty; + return {ranges.size(), empty}; + } + + llvm::SmallVector> results; + + // While `buffer` has space, take the next requested range and read + // memory into a `buffer` piece, then slice it to remove the used memory. + for (auto [addr, range_len] : ranges) { + Status status; + size_t num_bytes_read = + ReadMemoryFromInferior(addr, buffer.data(), range_len, status); + // FIXME: ReadMemoryFromInferior promises to return 0 in case of errors, but + // it doesn't; it never checks for errors. + if (status.Fail()) + num_bytes_read = 0; + + assert(num_bytes_read <= range_len && "read more than requested bytes"); + if (num_bytes_read > range_len) { + // In production builds, gracefully fail by returning length zero for this + // range. + results.emplace_back(); + continue; + } + + results.push_back(buffer.take_front(num_bytes_read)); + // Slice buffer to remove the used memory. + buffer = buffer.drop_front(num_bytes_read); + } + + return results; +} + void Process::DoFindInMemory(lldb::addr_t start_addr, lldb::addr_t end_addr, const uint8_t *buf, size_t size, AddressRanges &matches, size_t alignment, diff --git a/lldb/unittests/Target/MemoryTest.cpp b/lldb/unittests/Target/MemoryTest.cpp index 4a96730e00464..f7b4e97b1f64a 100644 --- a/lldb/unittests/Target/MemoryTest.cpp +++ b/lldb/unittests/Target/MemoryTest.cpp @@ -17,6 +17,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/DataBufferHeap.h" #include "gtest/gtest.h" +#include using namespace lldb_private; using namespace lldb; @@ -225,3 +226,144 @@ TEST_F(MemoryTest, TesetMemoryCacheRead) { // instead of using an // old cache } + +/// A process class that, when asked to read memory from some address X, returns +/// the least significant byte of X. +class DummyReaderProcess : public Process { +public: + // If true, `DoReadMemory` will not return all requested bytes. + // It's not possible to control exactly how many bytes will be read, because + // Process::ReadMemoryFromInferior tries to fulfill the entire request by + // reading smaller chunks until it gets nothing back. + bool read_less_than_requested = false; + bool read_more_than_requested = false; + + size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size, + Status &error) override { + if (read_less_than_requested && size > 0) + size--; + if (read_more_than_requested) + size *= 2; + uint8_t *buffer = static_cast(buf); + for (size_t addr = vm_addr; addr < vm_addr + size; addr++) + buffer[addr - vm_addr] = static_cast(addr); // LSB of addr. + return size; + } + // Boilerplate, nothing interesting below. + DummyReaderProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp) + : Process(target_sp, listener_sp) {} + bool CanDebug(lldb::TargetSP, bool) override { return true; } + Status DoDestroy() override { return {}; } + void RefreshStateAfterStop() override {} + bool DoUpdateThreadList(ThreadList &, ThreadList &) override { return false; } + llvm::StringRef GetPluginName() override { return "Dummy"; } +}; + +TEST_F(MemoryTest, TestReadMemoryRanges) { + ArchSpec arch("x86_64-apple-macosx-"); + + Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); + + DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + + TargetSP target_sp = CreateTarget(debugger_sp, arch); + ASSERT_TRUE(target_sp); + + ListenerSP listener_sp(Listener::MakeListener("dummy")); + ProcessSP process_sp = + std::make_shared(target_sp, listener_sp); + ASSERT_TRUE(process_sp); + + { + llvm::SmallVector buffer(1024, 0); + // Read 8 ranges of 128 bytes with arbitrary base addresses. + llvm::SmallVector> ranges = { + {0x12345, 128}, {0x11112222, 128}, {0x77777777, 128}, + {0xffaabbccdd, 128}, {0x0, 128}, {0x4242424242, 128}, + {0x17171717, 128}, {0x99999, 128}}; + + llvm::SmallVector> read_results = + process_sp->ReadMemoryRanges(ranges, buffer); + + for (auto [range, memory] : llvm::zip(ranges, read_results)) { + ASSERT_EQ(memory.size(), 128u); + addr_t range_base = range.GetRangeBase(); + for (auto [idx, byte] : llvm::enumerate(memory)) + ASSERT_EQ(byte, static_cast(range_base + idx)); + } + } + + auto &dummy_process = static_cast(*process_sp); + dummy_process.read_less_than_requested = true; + { + llvm::SmallVector buffer(1024, 0); + llvm::SmallVector> ranges = { + {0x12345, 128}, {0x11112222, 128}, {0x77777777, 128}}; + llvm::SmallVector> read_results = + dummy_process.ReadMemoryRanges(ranges, buffer); + for (auto [range, memory] : llvm::zip(ranges, read_results)) { + ASSERT_LT(memory.size(), 128u); + addr_t range_base = range.GetRangeBase(); + for (auto [idx, byte] : llvm::enumerate(memory)) + ASSERT_EQ(byte, static_cast(range_base + idx)); + } + } +} + +using MemoryDeathTest = MemoryTest; + +TEST_F(MemoryDeathTest, TestReadMemoryRangesReturnsTooMuch) { + ArchSpec arch("x86_64-apple-macosx-"); + Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); + DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + TargetSP target_sp = CreateTarget(debugger_sp, arch); + ASSERT_TRUE(target_sp); + ListenerSP listener_sp(Listener::MakeListener("dummy")); + ProcessSP process_sp = + std::make_shared(target_sp, listener_sp); + ASSERT_TRUE(process_sp); + + auto &dummy_process = static_cast(*process_sp); + dummy_process.read_more_than_requested = true; + llvm::SmallVector buffer(1024, 0); + llvm::SmallVector> ranges = {{0x12345, 128}}; + + llvm::SmallVector> read_results; + ASSERT_DEBUG_DEATH( + { read_results = process_sp->ReadMemoryRanges(ranges, buffer); }, + "read more than requested bytes"); +#ifdef NDEBUG + // With asserts off, the read should return empty ranges. + ASSERT_EQ(read_results.size(), 1u); + ASSERT_TRUE(read_results[0].empty()); +#endif +} + +TEST_F(MemoryDeathTest, TestReadMemoryRangesWithShortBuffer) { + ArchSpec arch("x86_64-apple-macosx-"); + Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch)); + DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + TargetSP target_sp = CreateTarget(debugger_sp, arch); + ASSERT_TRUE(target_sp); + ListenerSP listener_sp(Listener::MakeListener("dummy")); + ProcessSP process_sp = + std::make_shared(target_sp, listener_sp); + ASSERT_TRUE(process_sp); + + llvm::SmallVector short_buffer(10, 0); + llvm::SmallVector> ranges = {{0x12345, 128}, + {0x11, 128}}; + llvm::SmallVector> read_results; + ASSERT_DEBUG_DEATH( + { read_results = process_sp->ReadMemoryRanges(ranges, short_buffer); }, + "provided buffer is too short"); +#ifdef NDEBUG + // With asserts off, the read should return empty ranges. + ASSERT_EQ(read_results.size(), ranges.size()); + for (llvm::MutableArrayRef result : read_results) + ASSERT_TRUE(result.empty()); +#endif +} From 8e1255ff45b702c7cec034c11623bb2af81ea09c Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Wed, 22 Oct 2025 07:40:29 -0700 Subject: [PATCH 2/2] [lldb] Implement ProcessGDBRemote support for ReadMemoryRanges (#164311) This commit makes use of the newly created MultiMemRead packet to provide an efficient implementation of MultiMemRead inside ProcessGDBRemote. Testing is tricky, but it is accomplished two ways: 1. Some Objective-C tests would fail if this were implemented incorrectly, as there is already an in-tree use of the base class implementation of MultiMemRead, which is now getting replaced by the derived class. 2. One Objective-C test is modified so that we ensure the packet is being sent by looking at the packet logs. While not the most elegant solution, it is a strategy adopted in other tests as well. This gets around the fact that we cannot instantiate / unittest a mock ProcessGDBRemote. Depends on https://github.com/llvm/llvm-project/pull/163651 (cherry picked from commit 276bccda66f333171f5ba08d18d9301ee663cf7a) --- .../Process/gdb-remote/ProcessGDBRemote.cpp | 103 ++++++++++++++++++ .../Process/gdb-remote/ProcessGDBRemote.h | 16 +++ .../objc/foundation/TestObjCMethodsNSError.py | 27 +++++ 3 files changed, 146 insertions(+) diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 73c23ab24404b..8d87541f18610 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -86,6 +86,7 @@ #include "lldb/Host/Host.h" #include "lldb/Utility/StringExtractorGDBRemote.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" @@ -2735,6 +2736,108 @@ size_t ProcessGDBRemote::DoReadMemory(addr_t addr, void *buf, size_t size, return 0; } +llvm::SmallVector> +ProcessGDBRemote::ReadMemoryRanges( + llvm::ArrayRef> ranges, + llvm::MutableArrayRef buffer) { + if (!m_gdb_comm.GetMultiMemReadSupported()) + return Process::ReadMemoryRanges(ranges, buffer); + + llvm::Expected response = + SendMultiMemReadPacket(ranges); + if (!response) { + LLDB_LOG_ERROR(GetLog(GDBRLog::Process), response.takeError(), + "MultiMemRead error response: {0}"); + return Process::ReadMemoryRanges(ranges, buffer); + } + + llvm::StringRef response_str = response->GetStringRef(); + const unsigned expected_num_ranges = ranges.size(); + llvm::Expected>> + parsed_response = + ParseMultiMemReadPacket(response_str, buffer, expected_num_ranges); + if (!parsed_response) { + LLDB_LOG_ERROR(GetLog(GDBRLog::Process), parsed_response.takeError(), + "MultiMemRead error parsing response: {0}"); + return Process::ReadMemoryRanges(ranges, buffer); + } + return std::move(*parsed_response); +} + +llvm::Expected +ProcessGDBRemote::SendMultiMemReadPacket( + llvm::ArrayRef> ranges) { + std::string packet_str; + llvm::raw_string_ostream stream(packet_str); + stream << "MultiMemRead:ranges:"; + + auto range_to_stream = [&](auto range) { + // the "-" marker omits the '0x' prefix. + stream << llvm::formatv("{0:x-},{1:x-}", range.base, range.size); + }; + llvm::interleave(ranges, stream, range_to_stream, ","); + stream << ";"; + + StringExtractorGDBRemote response; + GDBRemoteCommunication::PacketResult packet_result = + m_gdb_comm.SendPacketAndWaitForResponse(packet_str.data(), response, + GetInterruptTimeout()); + if (packet_result != GDBRemoteCommunication::PacketResult::Success) + return llvm::createStringError( + llvm::formatv("MultiMemRead failed to send packet: '{0}'", packet_str)); + + if (response.IsErrorResponse()) + return llvm::createStringError( + llvm::formatv("MultiMemRead failed: '{0}'", response.GetStringRef())); + + if (!response.IsNormalResponse()) + return llvm::createStringError(llvm::formatv( + "MultiMemRead unexpected response: '{0}'", response.GetStringRef())); + + return response; +} + +llvm::Expected>> +ProcessGDBRemote::ParseMultiMemReadPacket(llvm::StringRef response_str, + llvm::MutableArrayRef buffer, + unsigned expected_num_ranges) { + // The sizes and the data are separated by a `;`. + auto [sizes_str, memory_data] = response_str.split(';'); + if (sizes_str.size() == response_str.size()) + return llvm::createStringError(llvm::formatv( + "MultiMemRead response missing field separator ';' in: '{0}'", + response_str)); + + llvm::SmallVector> read_results; + + // Sizes are separated by a `,`. + for (llvm::StringRef size_str : llvm::split(sizes_str, ',')) { + uint64_t read_size; + if (size_str.getAsInteger(16, read_size)) + return llvm::createStringError(llvm::formatv( + "MultiMemRead response has invalid size string: {0}", size_str)); + + if (memory_data.size() < read_size) + return llvm::createStringError( + llvm::formatv("MultiMemRead response did not have enough data, " + "requested sizes: {0}", + sizes_str)); + + llvm::StringRef region_to_read = memory_data.take_front(read_size); + memory_data = memory_data.drop_front(read_size); + + assert(buffer.size() >= read_size); + llvm::MutableArrayRef region_to_write = + buffer.take_front(read_size); + buffer = buffer.drop_front(read_size); + + memcpy(region_to_write.data(), region_to_read.data(), read_size); + read_results.push_back(region_to_write); + } + + return read_results; +} + bool ProcessGDBRemote::SupportsMemoryTagging() { return m_gdb_comm.GetMemoryTaggingSupported(); } diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 7c3dfb179a4b3..eb33b52b57441 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -137,6 +137,22 @@ class ProcessGDBRemote : public Process, size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, Status &error) override; + /// Override of ReadMemoryRanges that uses MultiMemRead to optimize this + /// operation. + llvm::SmallVector> + ReadMemoryRanges(llvm::ArrayRef> ranges, + llvm::MutableArrayRef buf) override; + +private: + llvm::Expected + SendMultiMemReadPacket(llvm::ArrayRef> ranges); + + llvm::Expected>> + ParseMultiMemReadPacket(llvm::StringRef response_str, + llvm::MutableArrayRef buffer, + unsigned expected_num_ranges); + +public: Status WriteObjectFile(std::vector entries) override; diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py index a14035db5e057..a9fbe54e074ff 100644 --- a/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethodsNSError.py @@ -45,3 +45,30 @@ def test_NSError_p(self): ], ) self.runCmd("process continue") + + @skipIfOutOfTreeDebugserver + def test_runtime_types_efficient_memreads(self): + # Test that we use an efficient reading of memory when reading + # Objective-C method descriptions. + logfile = os.path.join(self.getBuildDir(), "log.txt") + self.runCmd(f"log enable -f {logfile} gdb-remote packets process") + self.addTearDownHook(lambda: self.runCmd("log disable gdb-remote packets")) + + self.build() + self.target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "// Break here for NSString tests", lldb.SBFileSpec("main.m", False) + ) + + self.runCmd(f"proc plugin packet send StartTesting", check=False) + self.expect('expression str = [NSString stringWithCString: "new"]') + self.runCmd(f"proc plugin packet send EndTesting", check=False) + + self.assertTrue(os.path.exists(logfile)) + log_text = open(logfile).read() + log_text = log_text.split("StartTesting", 1)[-1].split("EndTesting", 1)[0] + + # This test is only checking that the packet it used at all (and that + # no errors are produced). It doesn't check that the packet is being + # used to solve a problem in an optimal way. + self.assertIn("MultiMemRead:", log_text) + self.assertNotIn("MultiMemRead error", log_text)