[lldb] Remove non address bits from memory read arguments

Addresses on AArch64 can have top byte tags, memory tags and pointer authentication signatures in the upper bits. While testing memory tagging I found that memory read couldn't read a range if the two addresses had different tags. The same could apply to signed pointers given the right circumstance. (lldb) memory read mte_buf_alt_tag mte_buf+16 error: end address (0x900fffff7ff8010) must be greater than the start address (0xa00fffff7ff8000). Or it would try to read a lot more memory than expected. (lldb) memory read mte_buf mte_buf_alt_tag+16 error: Normally, 'memory read' will not read over 1024 bytes of data. error: Please use --force to override this restriction just once. error: or set target.max-memory-read-size if you will often need a larger limit. Fix this by removing non address bits before we calculate the read range. A test is added for AArch64 Linux that confirms this by using the top byte ignore feature. This means that if you do read with a tagged pointer the output does not include those tags. This is potentially confusing but I think overall it's better that we don't pretend that we're reading memory from a range that the process is unable to map. (lldb) p ptr1 (char *) $4 = 0x3400fffffffff140 "\x80\xf1\xff\xff\xff\xff" (lldb) p ptr2 (char *) $5 = 0x5600fffffffff140 "\x80\xf1\xff\xff\xff\xff" (lldb) memory read ptr1 ptr2+16 0xfffffffff140: 80 f1 ff ff ff ff 00 00 38 70 bc f7 ff ff 00 00 ........8p...... Reviewed By: omjavaid, danielkiss Differential Revision: https://reviews.llvm.org/D103626
llvm · Jan 11, 2022 · 88fdce5 · 88fdce5
1 parent dbb8d08
commit 88fdce5
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 0 deletions.
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -23,6 +23,7 @@
 #include "lldb/Interpreter/Options.h"
 #include "lldb/Symbol/SymbolFile.h"
 #include "lldb/Symbol/TypeList.h"
+#include "lldb/Target/ABI.h"
 #include "lldb/Target/Language.h"
 #include "lldb/Target/MemoryHistory.h"
 #include "lldb/Target/MemoryRegionInfo.h"
@@ -590,9 +591,16 @@ class CommandObjectMemoryRead : public CommandObjectParsed {
       return false;
     }
 
+    ABISP abi = m_exe_ctx.GetProcessPtr()->GetABI();
+    if (abi)
+      addr = abi->FixDataAddress(addr);
+
     if (argc == 2) {
       lldb::addr_t end_addr = OptionArgParser::ToAddress(
           &m_exe_ctx, command[1].ref(), LLDB_INVALID_ADDRESS, nullptr);
+      if (end_addr != LLDB_INVALID_ADDRESS && abi)
+        end_addr = abi->FixDataAddress(end_addr);
+
       if (end_addr == LLDB_INVALID_ADDRESS) {
         result.AppendError("invalid end address expression.");
         result.AppendError(error.AsCString());

diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/Makefile b/lldb/test/API/linux/aarch64/tagged_memory_read/Makefile
@@ -0,0 +1,4 @@
+C_SOURCES := main.c
+CFLAGS_EXTRAS := -march=armv8.3-a
+
+include Makefile.rules
diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py b/lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py
@@ -0,0 +1,55 @@
+"""
+Test that "memory read" removes non address bits from
+memory read arguments.
+"""
+
+
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class AArch64LinuxTaggedMemoryReadTestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    NO_DEBUG_INFO_TESTCASE = True
+
+    # AArch64 Linux always enables top byte ignore
+    @skipUnlessArch("aarch64")
+    @skipUnlessPlatform(["linux"])
+    def test_tagged_memory_read(self):
+        self.build()
+        self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET)
+
+        lldbutil.run_break_set_by_file_and_line(self, "main.c",
+            line_number('main.c', '// Set break point at this line.'),
+            num_expected_locations=1)
+
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        if self.process().GetState() == lldb.eStateExited:
+            self.fail("Test program failed to run.")
+
+        self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT,
+            substrs=['stopped',
+                     'stop reason = breakpoint'])
+
+        # If we do not remove non address bits, this can fail in two ways.
+        # 1. We attempt to read much more than 16 bytes, probably more than
+        #    the default 1024 byte read size. Which will error.
+        # 2. We error because end address is < start address since end's
+        #    tag is < start's tag.
+        #
+        # Each time we check that the printed line addresses do not include
+        # either of the tags we set. Those bits are a property of the
+        # pointer not of the memory it points to.
+        tagged_addr_pattern = "0x(34|46)[0-9A-Fa-f]{14}:.*"
+        self.expect("memory read ptr1 ptr2+16", patterns=[tagged_addr_pattern], matching=False)
+        # Check that the stored previous end address is stripped
+        self.expect("memory read", patterns=[tagged_addr_pattern], matching=False)
+        # Would fail if we don't remove non address bits because 0x56... > 0x34...
+        self.expect("memory read ptr2 ptr1+16", patterns=[tagged_addr_pattern], matching=False)
+        self.expect("memory read", patterns=[tagged_addr_pattern], matching=False)
diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/main.c b/lldb/test/API/linux/aarch64/tagged_memory_read/main.c
@@ -0,0 +1,15 @@
+#include <stddef.h>
+
+static char *set_non_address_bits(char *ptr, size_t tag) {
+  // Set top byte tag (AArch64 Linux always enables top byte ignore)
+  return (char *)((size_t)ptr | (tag << 56));
+}
+
+int main(int argc, char const *argv[]) {
+  char buf[32];
+
+  char *ptr1 = set_non_address_bits(buf, 0x34);
+  char *ptr2 = set_non_address_bits(buf, 0x56);
+
+  return 0; // Set break point at this line.
+}
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
@@ -163,6 +163,9 @@ Changes to LLDB
 * A change in Clang's type printing has changed the way LLDB names array types
   (from ``int [N]`` to ``int[N]``) - LLDB pretty printer type name matching
   code may need to be updated to handle this.
+* The ``memory read`` command now ignores non-address bits in start and end
+  addresses. In addition, non-address bits will not be shown in the addresses
+  in the output.
 
 Changes to Sanitizers
 ---------------------