Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[scudo] [MTE] resize stack depot for allocation ring buffer #74515

Merged
merged 17 commits into from
Feb 6, 2024

Conversation

fmayer
Copy link
Contributor

@fmayer fmayer commented Dec 5, 2023

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 5, 2023

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Florian Mayer (fmayer)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/74515.diff

5 Files Affected:

  • (modified) compiler-rt/lib/scudo/standalone/combined.h (+61-21)
  • (modified) compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp (+10-9)
  • (modified) compiler-rt/lib/scudo/standalone/platform.h (-10)
  • (modified) compiler-rt/lib/scudo/standalone/stack_depot.h (+50-30)
  • (modified) compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp (+1-1)
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 72e5caa026e4d..c6027ae25f318 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -14,6 +14,7 @@
 #include "flags.h"
 #include "flags_parser.h"
 #include "local_cache.h"
+#include "mem_map.h"
 #include "memtag.h"
 #include "options.h"
 #include "quarantine.h"
@@ -289,7 +290,9 @@ class Allocator {
     uptr Size =
         android_unsafe_frame_pointer_chase(Stack, MaxTraceSize + DiscardFrames);
     Size = Min<uptr>(Size, MaxTraceSize + DiscardFrames);
-    return Depot.insert(Stack + Min<uptr>(DiscardFrames, Size), Stack + Size);
+    return reinterpret_cast<StackDepot *>(RawStackDepot)
+        ->insert(RawStackDepot, Stack + Min<uptr>(DiscardFrames, Size),
+                 Stack + Size);
 #else
     return 0;
 #endif
@@ -916,8 +919,14 @@ class Allocator {
       Primary.Options.clear(OptionBit::AddLargeAllocationSlack);
   }
 
-  const char *getStackDepotAddress() const {
-    return reinterpret_cast<const char *>(&Depot);
+  const char *getStackDepotAddress() {
+    initThreadMaybe();
+    return RawStackDepot;
+  }
+
+  uptr getStackDepotSize() {
+    initThreadMaybe();
+    return StackDepotSize;
   }
 
   const char *getRegionInfoArrayAddress() const {
@@ -954,13 +963,14 @@ class Allocator {
 
   static const uptr MaxTraceSize = 64;
 
-  static void collectTraceMaybe(const StackDepot *Depot,
+  static void collectTraceMaybe(const char *RawStackDepot,
                                 uintptr_t (&Trace)[MaxTraceSize], u32 Hash) {
+    auto *Depot = reinterpret_cast<const StackDepot *>(RawStackDepot);
     uptr RingPos, Size;
-    if (!Depot->find(Hash, &RingPos, &Size))
+    if (!Depot->find(RawStackDepot, Hash, &RingPos, &Size))
       return;
     for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I)
-      Trace[I] = static_cast<uintptr_t>((*Depot)[RingPos + I]);
+      Trace[I] = static_cast<uintptr_t>(Depot->at(RawStackDepot, RingPos + I));
   }
 
   static void getErrorInfo(struct scudo_error_info *ErrorInfo,
@@ -973,25 +983,24 @@ class Allocator {
         MemoryAddr + MemorySize < MemoryAddr)
       return;
 
-    auto *Depot = reinterpret_cast<const StackDepot *>(DepotPtr);
     size_t NextErrorReport = 0;
 
     // Check for OOB in the current block and the two surrounding blocks. Beyond
     // that, UAF is more likely.
     if (extractTag(FaultAddr) != 0)
-      getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot,
+      getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, DepotPtr,
                          RegionInfoPtr, Memory, MemoryTags, MemoryAddr,
                          MemorySize, 0, 2);
 
     // Check the ring buffer. For primary allocations this will only find UAF;
     // for secondary allocations we can find either UAF or OOB.
-    getRingBufferErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot,
+    getRingBufferErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, DepotPtr,
                            RingBufferPtr);
 
     // Check for OOB in the 28 blocks surrounding the 3 we checked earlier.
     // Beyond that we are likely to hit false positives.
     if (extractTag(FaultAddr) != 0)
-      getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot,
+      getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, DepotPtr,
                          RegionInfoPtr, Memory, MemoryTags, MemoryAddr,
                          MemorySize, 2, 16);
   }
@@ -1039,7 +1048,8 @@ class Allocator {
   uptr GuardedAllocSlotSize = 0;
 #endif // GWP_ASAN_HOOKS
 
-  StackDepot Depot;
+  char *RawStackDepot = nullptr;
+  uptr StackDepotSize = 0;
 
   struct AllocationRingBuffer {
     struct Entry {
@@ -1255,7 +1265,8 @@ class Allocator {
   }
 
   void storePrimaryAllocationStackMaybe(const Options &Options, void *Ptr) {
-    if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
+    if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)) ||
+        !RawRingBuffer)
       return;
     auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
     Ptr32[MemTagAllocationTraceIndex] = collectStackTrace();
@@ -1288,7 +1299,8 @@ class Allocator {
 
   void storeSecondaryAllocationStackMaybe(const Options &Options, void *Ptr,
                                           uptr Size) {
-    if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
+    if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)) ||
+        !RawRingBuffer)
       return;
 
     u32 Trace = collectStackTrace();
@@ -1303,7 +1315,8 @@ class Allocator {
 
   void storeDeallocationStackMaybe(const Options &Options, void *Ptr,
                                    u8 PrevTag, uptr Size) {
-    if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)))
+    if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks)) ||
+        !RawRingBuffer)
       return;
 
     auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
@@ -1324,7 +1337,7 @@ class Allocator {
 
   static void getInlineErrorInfo(struct scudo_error_info *ErrorInfo,
                                  size_t &NextErrorReport, uintptr_t FaultAddr,
-                                 const StackDepot *Depot,
+                                 const char *RawStackDepot,
                                  const char *RegionInfoPtr, const char *Memory,
                                  const char *MemoryTags, uintptr_t MemoryAddr,
                                  size_t MemorySize, size_t MinDistance,
@@ -1389,8 +1402,10 @@ class Allocator {
           UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW;
       R->allocation_address = ChunkAddr;
       R->allocation_size = Header.SizeOrUnusedBytes;
-      collectTraceMaybe(Depot, R->allocation_trace,
-                        Data[MemTagAllocationTraceIndex]);
+      if (RawStackDepot) {
+        collectTraceMaybe(RawStackDepot, R->allocation_trace,
+                          Data[MemTagAllocationTraceIndex]);
+      }
       R->allocation_tid = Data[MemTagAllocationTidIndex];
       return NextErrorReport == NumErrorReports;
     };
@@ -1407,11 +1422,11 @@ class Allocator {
   static void getRingBufferErrorInfo(struct scudo_error_info *ErrorInfo,
                                      size_t &NextErrorReport,
                                      uintptr_t FaultAddr,
-                                     const StackDepot *Depot,
+                                     const char *RawStackDepot,
                                      const char *RingBufferPtr) {
     auto *RingBuffer =
         reinterpret_cast<const AllocationRingBuffer *>(RingBufferPtr);
-    if (!RingBuffer || RingBuffer->Size == 0)
+    if (!RingBuffer || RingBuffer->Size == 0 || !RawStackDepot)
       return;
     uptr Pos = atomic_load_relaxed(&RingBuffer->Pos);
 
@@ -1470,9 +1485,10 @@ class Allocator {
 
       R->allocation_address = UntaggedEntryPtr;
       R->allocation_size = EntrySize;
-      collectTraceMaybe(Depot, R->allocation_trace, AllocationTrace);
+      collectTraceMaybe(RawStackDepot, R->allocation_trace, AllocationTrace);
       R->allocation_tid = AllocationTid;
-      collectTraceMaybe(Depot, R->deallocation_trace, DeallocationTrace);
+      collectTraceMaybe(RawStackDepot, R->deallocation_trace,
+                        DeallocationTrace);
       R->deallocation_tid = DeallocationTid;
     }
   }
@@ -1501,6 +1517,27 @@ class Allocator {
       return;
     u32 AllocationRingBufferSize =
         static_cast<u32>(getFlags()->allocation_ring_buffer_size);
+    // We store alloc and free stacks for each entry.
+    constexpr auto kStacksPerRingBufferEntry = 2;
+    u32 TabSize = static_cast<u32>(roundUpPowerOfTwo(kStacksPerRingBufferEntry *
+                                                     AllocationRingBufferSize));
+    constexpr auto kFramesPerStack = 8;
+    static_assert(isPowerOfTwo(kFramesPerStack));
+    u32 RingSize = static_cast<u32>(TabSize * kFramesPerStack);
+    DCHECK(isPowerOfTwo(RingSize));
+    static_assert(sizeof(StackDepot) % alignof(atomic_u64) == 0);
+
+    StackDepotSize =
+        roundUp(sizeof(StackDepot) + sizeof(atomic_u64) * RingSize +
+                    sizeof(atomic_u32) * TabSize,
+                getPageSizeCached());
+    MemMapT DepotMap;
+    DepotMap.map(
+        /*Addr=*/0U, StackDepotSize, "scudo:stack_depot");
+    RawStackDepot = reinterpret_cast<char *>(DepotMap.getBase());
+    auto *Depot = reinterpret_cast<StackDepot *>(DepotMap.getBase());
+    Depot->init(DepotMap, RingSize, TabSize);
+
     MemMapT MemMap;
     MemMap.map(
         /*Addr=*/0U,
@@ -1524,6 +1561,9 @@ class Allocator {
       MemMap.unmap(MemMap.getBase(), MemMap.getCapacity());
     }
     RawRingBuffer = nullptr;
+    if (RawStackDepot) {
+      reinterpret_cast<StackDepot *>(RawStackDepot)->free();
+    }
   }
 
   static constexpr size_t ringBufferSizeInBytes(u32 AllocationRingBufferSize) {
diff --git a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp
index 74456450a4761..dd3da623ca5e9 100644
--- a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp
+++ b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp
@@ -29,13 +29,13 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) {
   size_t MemorySize = (MemoryAndTags.length() / 17) * 16;
   const char *MemoryTags = Memory + MemorySize;
 
-  std::string StackDepotBytes =
+  scudo::StackDepot Depot;
+  std::string DepotRingBytes =
       FDP.ConsumeRandomLengthString(FDP.remaining_bytes());
-  std::vector<char> StackDepot(sizeof(scudo::StackDepot), 0);
-  for (size_t i = 0; i < StackDepotBytes.length() && i < StackDepot.size();
-       ++i) {
-    StackDepot[i] = StackDepotBytes[i];
-  }
+  std::string DepotTabBytes =
+      FDP.ConsumeRandomLengthString(FDP.remaining_bytes());
+  Depot.SetRingForFuzzing(DepotRingBytes.c_str(), DepotRingBytes.size());
+  Depot.SetTabForFuzzing(DepotTabBytes.c_str(), DepotTabBytes.size());
 
   std::string RegionInfoBytes =
       FDP.ConsumeRandomLengthString(FDP.remaining_bytes());
@@ -52,8 +52,9 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) {
     return 0;
 
   scudo_error_info ErrorInfo;
-  AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(),
-                           RegionInfo.data(), RingBufferBytes.data(), Memory,
-                           MemoryTags, MemoryAddr, MemorySize);
+  AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr,
+                           reinterpret_cast<char *>(&Depot), RegionInfo.data(),
+                           RingBufferBytes.data(), Memory, MemoryTags,
+                           MemoryAddr, MemorySize);
   return 0;
 }
diff --git a/compiler-rt/lib/scudo/standalone/platform.h b/compiler-rt/lib/scudo/standalone/platform.h
index b71a86be7669f..5af1275e32d2b 100644
--- a/compiler-rt/lib/scudo/standalone/platform.h
+++ b/compiler-rt/lib/scudo/standalone/platform.h
@@ -63,16 +63,6 @@
 #define SCUDO_CAN_USE_MTE (SCUDO_LINUX || SCUDO_TRUSTY)
 #endif
 
-// Use smaller table sizes for fuzzing in order to reduce input size.
-// Trusty just has less available memory.
-#ifndef SCUDO_SMALL_STACK_DEPOT
-#if defined(SCUDO_FUZZ) || SCUDO_TRUSTY
-#define SCUDO_SMALL_STACK_DEPOT 1
-#else
-#define SCUDO_SMALL_STACK_DEPOT 0
-#endif
-#endif
-
 #ifndef SCUDO_ENABLE_HOOKS
 #define SCUDO_ENABLE_HOOKS 0
 #endif
diff --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h
index 12c35eb2a4f33..b812658c95463 100644
--- a/compiler-rt/lib/scudo/standalone/stack_depot.h
+++ b/compiler-rt/lib/scudo/standalone/stack_depot.h
@@ -10,6 +10,8 @@
 #define SCUDO_STACK_DEPOT_H_
 
 #include "atomic_helpers.h"
+#include "common.h"
+#include "mem_map.h"
 #include "mutex.h"
 
 namespace scudo {
@@ -38,7 +40,7 @@ class MurMur2HashBuilder {
   }
 };
 
-class StackDepot {
+class alignas(16) StackDepot {
   HybridMutex RingEndMu;
   u32 RingEnd = 0;
 
@@ -62,47 +64,62 @@ class StackDepot {
   // This is achieved by re-checking the hash of the stack trace before
   // returning the trace.
 
-#if SCUDO_SMALL_STACK_DEPOT
-  static const uptr TabBits = 4;
-#else
-  static const uptr TabBits = 16;
-#endif
-  static const uptr TabSize = 1 << TabBits;
-  static const uptr TabMask = TabSize - 1;
-  atomic_u32 Tab[TabSize] = {};
-
-#if SCUDO_SMALL_STACK_DEPOT
-  static const uptr RingBits = 4;
-#else
-  static const uptr RingBits = 19;
-#endif
-  static const uptr RingSize = 1 << RingBits;
-  static const uptr RingMask = RingSize - 1;
-  atomic_u64 Ring[RingSize] = {};
+  MemMapT DepotMap;
+  uptr RingSize = 0;
+  uptr RingMask = 0;
+  uptr TabMask = 0;
+
+  atomic_u64 *Ring(char *RawStackDepot) {
+    return reinterpret_cast<atomic_u64 *>(RawStackDepot + sizeof(StackDepot));
+  }
+
+  atomic_u32 *Tab(char *RawStackDepot) {
+    return reinterpret_cast<atomic_u32 *>(RawStackDepot + sizeof(StackDepot) +
+                                          sizeof(atomic_u64) * RingSize);
+  }
+
+  const atomic_u64 *Ring(const char *RawStackDepot) const {
+    return reinterpret_cast<const atomic_u64 *>(RawStackDepot +
+                                                sizeof(StackDepot));
+  }
+
+  const atomic_u32 *Tab(const char *RawStackDepot) const {
+    return reinterpret_cast<const atomic_u32 *>(
+        RawStackDepot + sizeof(StackDepot) + sizeof(atomic_u64) * RingSize);
+  }
 
 public:
+  void init(MemMapT Map, uptr RingSz, uptr TabSz) {
+    DCHECK(isPowerOfTwo(RingSz));
+    DCHECK(isPowerOfTwo(TabSz));
+    DepotMap = Map;
+    RingSize = RingSz;
+    RingMask = RingSz - 1;
+    TabMask = TabSz - 1;
+  }
+
   // Insert hash of the stack trace [Begin, End) into the stack depot, and
   // return the hash.
-  u32 insert(uptr *Begin, uptr *End) {
+  u32 insert(char *RawStackDepot, uptr *Begin, uptr *End) {
     MurMur2HashBuilder B;
     for (uptr *I = Begin; I != End; ++I)
       B.add(u32(*I) >> 2);
     u32 Hash = B.get();
 
     u32 Pos = Hash & TabMask;
-    u32 RingPos = atomic_load_relaxed(&Tab[Pos]);
-    u64 Entry = atomic_load_relaxed(&Ring[RingPos]);
+    u32 RingPos = atomic_load_relaxed(&Tab(RawStackDepot)[Pos]);
+    u64 Entry = atomic_load_relaxed(&Ring(RawStackDepot)[RingPos]);
     u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1;
     if (Entry == Id)
       return Hash;
 
     ScopedLock Lock(RingEndMu);
     RingPos = RingEnd;
-    atomic_store_relaxed(&Tab[Pos], RingPos);
-    atomic_store_relaxed(&Ring[RingPos], Id);
+    atomic_store_relaxed(&Tab(RawStackDepot)[Pos], RingPos);
+    atomic_store_relaxed(&Ring(RawStackDepot)[RingPos], Id);
     for (uptr *I = Begin; I != End; ++I) {
       RingPos = (RingPos + 1) & RingMask;
-      atomic_store_relaxed(&Ring[RingPos], *I);
+      atomic_store_relaxed(&Ring(RawStackDepot)[RingPos], *I);
     }
     RingEnd = (RingPos + 1) & RingMask;
     return Hash;
@@ -111,12 +128,13 @@ class StackDepot {
   // Look up a stack trace by hash. Returns true if successful. The trace may be
   // accessed via operator[] passing indexes between *RingPosPtr and
   // *RingPosPtr + *SizePtr.
-  bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const {
+  bool find(const char *RawStackDepot, u32 Hash, uptr *RingPosPtr,
+            uptr *SizePtr) const {
     u32 Pos = Hash & TabMask;
-    u32 RingPos = atomic_load_relaxed(&Tab[Pos]);
+    u32 RingPos = atomic_load_relaxed(&Tab(RawStackDepot)[Pos]);
     if (RingPos >= RingSize)
       return false;
-    u64 Entry = atomic_load_relaxed(&Ring[RingPos]);
+    u64 Entry = atomic_load_relaxed(&Ring(RawStackDepot)[RingPos]);
     u64 HashWithTagBit = (u64(Hash) << 1) | 1;
     if ((Entry & 0x1ffffffff) != HashWithTagBit)
       return false;
@@ -128,13 +146,15 @@ class StackDepot {
     MurMur2HashBuilder B;
     for (uptr I = 0; I != Size; ++I) {
       RingPos = (RingPos + 1) & RingMask;
-      B.add(u32(atomic_load_relaxed(&Ring[RingPos])) >> 2);
+      B.add(u32(atomic_load_relaxed(&Ring(RawStackDepot)[RingPos])) >> 2);
     }
     return B.get() == Hash;
   }
 
-  u64 operator[](uptr RingPos) const {
-    return atomic_load_relaxed(&Ring[RingPos & RingMask]);
+  void free() { DepotMap.unmap(DepotMap.getBase(), DepotMap.getCapacity()); }
+
+  u64 at(const char *RawStackDepot, uptr RingPos) const {
+    return atomic_load_relaxed(&Ring(RawStackDepot)[RingPos & RingMask]);
   }
 };
 
diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp
index 4fed44779b902..ee78b76efd310 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp
@@ -54,7 +54,7 @@ INTERFACE const char *__scudo_get_stack_depot_addr() {
 }
 
 INTERFACE size_t __scudo_get_stack_depot_size() {
-  return sizeof(scudo::StackDepot);
+  return Allocator.getStackDepotSize();
 }
 
 INTERFACE const char *__scudo_get_region_info_addr() {

@fmayer fmayer changed the title resize stack depot for allocation ring buffer [scudo] [MTE] resize stack depot for allocation ring buffer Dec 5, 2023
@fmayer fmayer marked this pull request as draft December 5, 2023 21:27
@fmayer fmayer force-pushed the initdepot branch 6 times, most recently from aa552db to e3f76e6 Compare December 9, 2023 01:30
@fmayer fmayer marked this pull request as ready for review December 9, 2023 01:33
Copy link
Contributor

@cferris1000 cferris1000 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to add some new tests for this? I don't think we have any, so I presume you are using the Android debuggerd tests on a MTE device to verify this.

compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
// (TabMask + 1) atomic_u32.

atomic_u64 *Ring(char *RawStackDepot) {
return reinterpret_cast<atomic_u64 *>(RawStackDepot + sizeof(StackDepot));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to use the RawDepotMap.getBase() function instead of this? It's already a uptr so no need to do the second cast.

The call is potentially more expensive but I don't think that matters if this is used in debugging contexts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which second cast? We want to cast to an atomic_u64 either way?

}

atomic_u32 *Tab(char *RawStackDepot) {
return reinterpret_cast<atomic_u32 *>(RawStackDepot + sizeof(StackDepot) +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance that these values could ever overflow? I know you are planning to allow the setting of the ring buffersize, but since everything is calculated from that value, I don't think an overflow should be possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. If we take the max possible values for TabSize and RingSize in mapAndInitializeRingBuffer (UINT32_MAX), we are within bounds of uptr (on 64 bits) for this:

    StackDepotSize = sizeof(StackDepot) + sizeof(atomic_u64) * RingSize +
                     sizeof(atomic_u32) * TabSize;

Added some extra checks against benign overflows in the calculation of TabSize and RingSize (it will just lead to the stack depot to be smaller than expected), and a static assert about the statement above.

Copy link
Contributor

@ChiaHungDuan ChiaHungDuan left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just took a quick review, will do a second round tomorrow

compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/combined.h Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/stack_depot.h Outdated Show resolved Hide resolved
Comment on lines 121 to 123
if (!isPowerOfTwo(TabSize)) {
return false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have asserted that the TabSize should be power of 2, is this still possible to be happened?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this is called on the buffer we get from debuggerd, not the object we construct using init. So we need to check again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then do we want to dump any message so that we know why the depot is corrupted? It seems that we only bail out silently.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems like a novel concept to have non-fatal errors in scudo. All the logging in report.cpp seems to involve crashing. Happy to add a non-crashing version if desired.

compiler-rt/lib/scudo/standalone/combined.h Show resolved Hide resolved
Comment on lines 1576 to 1590
if (RawStackDepot) {
RawStackDepotMap.unmap(RawStackDepotMap.getBase(),
RawStackDepotMap.getCapacity());
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the question mentioned by @cferris1000, I'm not sure if we want this RawStackDepot. I would consider having a getStackDepot() and do all the necessary shifting and casting. Then we only need to check if RawStackDepotMap.isAllocated() here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, this is to satisfy the C++ standard rules about object bounds. This is the same we do for the AllocationRingBuffer, and this was discussed and agreed on in that review.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, what does this mean? I don't see how this affects object bounds. To be fair, this code is incredibly hard to follow because of the way you are slicing memory and doing the casting and checking.

That's why I would suggest not allocating one big piece, but allocating separate pieces for each individual item. If you are worried about wasting memory, how much memory do you think you would waste? I think maybe it would get you an extra page or two at most. It would make the code easier to follow.

Copy link
Contributor Author

@fmayer fmayer Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, what does this mean? I don't see how this affects object bounds. To be fair, this code is incredibly hard to follow because of the way you are slicing memory and doing the casting and checking.

That's why I would suggest not allocating one big piece, but allocating separate pieces for each individual item. If you are worried about wasting memory, how much memory do you think you would waste? I think maybe it would get you an extra page or two at most. It would make the code easier to follow.

This isn't going to work. This is read by an external process here: https://cs.android.com/android/platform/superproject/main/+/main:system/core/debuggerd/libdebuggerd/scudo.cpp;l=52;drc=cdf55585a788d5b7ad4e4804046549c54d5c1791. This is why this has to be a single contiguous allocation.

RawStackDepot = reinterpret_cast<char *>(DepotMap.getBase());
auto *Depot = reinterpret_cast<StackDepot *>(DepotMap.getBase());
Depot->init(RingSize, TabSize);
DCHECK(Depot->isValid(StackDepotSize));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This DCHECK seems a little weird to me. We are creating the depot regarding the StackDepotSize but we verify the value after the creation.

Even if you want to check either RingSize or TabSize, I may suggest checking them in init()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But init doesn't need to know the StackDepotSize.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me that it's a logic conflict. Why do we init something with invalid arguments? Shouldn't we stop the initialization before we construct it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't, that's why the DCHECK is there? A DCHECK should assert what's always true.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I'm not saying how it's verified. I'm talking about the code structure.

If the DCHECK fails, what will you review? It should be the RingSize and TabSize because they are used for the initialization. Therefore, checking the values before/in the middle of the initialization gives you the useful insight about what's going on.

Initialization usually involves several steps, it'd be better to tell when there's something wrong instead of carrying the error to the end. If it causes any crash, that DCHECK won't help anything. Besides, at the end of initialization, I would expect that everything is fine. For example, I don't need to verify if std::vector<int>(4, 0) gives me 4 elements and all of them are zero.

IMO, the DCHECK is not used at the right place.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I replied to this, not sure if GitHub problem or I actually didn't:

I don't understand the problem with adding this DCHECK. This makes sure that the logic for isValid and for init are consistent, which is a good thing to check in debug builds. If it fails it's always something wrong with the code of either of them, because it should not fail by construction of the code.

I don't need to verify if std::vector(4, 0) gives me 4 elements and all of them are zero.

I don't think that's a correct analogy. If I was the implementer of std::vector then I might want to add some debug checks about the internal state of std::vector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned, if the DCHECK fails, how do we know why it's failed? We still need to check all the logic in isValid and review the two arguments of init. Why don't we just stop the initialization when we see something wrong? If you already know the arguments are going to generate an invalid object, why do you still do that?

The reason for the vector example is, after construction, we don't need to verify the size and content like

std::vector<int> foo(4, 0);
DCHECK_EQ(foo.size(), 4);
for (int v : foo)
  DCHECK_EQ(v, 0);

Of course you may want to add some debug checks in the implementation but not asking the user to verify it. In this case, the user has to do the verification and I think it is kind of unreasonable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed offline, there was some misunderstanding, a user should not be able to trigger this. But given we also assert this in tests, I just removed it.

// Insert hash of the stack trace [Begin, End) into the stack depot, and
// return the hash.
u32 insert(uptr *Begin, uptr *End) {
u32 insert(char *RawStackDepot, uptr *Begin, uptr *End) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From where this is used, it seems to be the this pointer. Why do we need to pass it? BTW, I'm still figuring out how StackDepot works, if we could have some comment, that'll be easier for the review in the future

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, this is to satisfy the C++ standard rules about object bounds. This is the same we do for the AllocationRingBuffer, and this was discussed and agreed on in that review.

Comment on lines 121 to 123
if (!isPowerOfTwo(TabSize)) {
return false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then do we want to dump any message so that we know why the depot is corrupted? It seems that we only bail out silently.

compiler-rt/lib/scudo/standalone/stack_depot.h Outdated Show resolved Hide resolved
@fmayer
Copy link
Contributor Author

fmayer commented Dec 13, 2023

Is it possible to add some new tests for this? I don't think we have any, so I presume you are using the Android debuggerd tests on a MTE device to verify this.

Added some tests. Also ran debuggerd tests (seems like I had forgotten to run them on the latest version before, I did run but broke something before uploading - very sorry about that).

Copy link

github-actions bot commented Dec 13, 2023

✅ With the latest revision this PR passed the C/C++ code formatter.

@fmayer
Copy link
Contributor Author

fmayer commented Jan 9, 2024

PTAL

uintptr_t MemoryAddr, size_t MemorySize) {
// N.B. we need to support corrupted data in any of the buffers here. We get
// this information from an external process (the crashing process) that
// shouldn't be able to crash crash_dump.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully understand what this comment means.

What is crash_dump, was that a typo?

Copy link
Contributor Author

@fmayer fmayer Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

crash_dump is what uses this function in Android.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated this comment and clarified that crash_dump is talking about the Android use case.

compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
Comment on lines 1576 to 1590
if (RawStackDepot) {
RawStackDepotMap.unmap(RawStackDepotMap.getBase(),
RawStackDepotMap.getCapacity());
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, what does this mean? I don't see how this affects object bounds. To be fair, this code is incredibly hard to follow because of the way you are slicing memory and doing the casting and checking.

That's why I would suggest not allocating one big piece, but allocating separate pieces for each individual item. If you are worried about wasting memory, how much memory do you think you would waste? I think maybe it would get you an extra page or two at most. It would make the code easier to follow.

@fmayer
Copy link
Contributor Author

fmayer commented Feb 1, 2024

What should I do to drive this forward? I am not sure which comments you want addressed in which way to move forward. I reshuffled the test a bit to make it easier to read by splitting some assertions and the actual implementation. I don't think splitting into more and more mmaps is great as we need to plumb all that through libc and debuggerd in AOSP.

Copy link
Contributor

@ChiaHungDuan ChiaHungDuan left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the delay. Most remaining comments are minor style fixes. The last thing I tend to have it fixed is the DCHECK for isValid. The `isValid should take three arguments, RingSize, TabSize and BufSize and if it's invalid, stop the initialization.

compiler-rt/lib/scudo/standalone/combined.h Outdated Show resolved Hide resolved
compiler-rt/lib/scudo/standalone/combined.h Show resolved Hide resolved
}

uptr getStackDepotSize() {
initThreadMaybe();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mapAndInitializeRingBuffer is called in init which is called through initThreadMaybe (through initOnceMaybe).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. Sorry I missed that. Then is calling mapAndInitializeRingBuffer enough?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure that's a good idea. E.g. if we call mapAndInitializeRingBuffer and someone else calls initThreadMaybe on another thread, we would have to add synchronization into the mapAndInitializeRingBuffer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. In genereal, initThreadMaybe is used when we are going to do some memory allocation/deallocation. Given the mapAndInitializeRingBuffer is not synchronized at the function level. I'm fine with doing it like this. I will figure out if we can make it better

Comment on lines 1529 to 1532
constexpr auto kStacksPerRingBufferEntry = 2;
constexpr auto kMaxU32Pow2 = ~(UINT32_MAX >> 1);
static_assert(isPowerOfTwo(kMaxU32Pow2));
constexpr auto kFramesPerStack = 8;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please specify the type instead of auto.

RawStackDepot = reinterpret_cast<char *>(DepotMap.getBase());
auto *Depot = reinterpret_cast<StackDepot *>(DepotMap.getBase());
Depot->init(RingSize, TabSize);
DCHECK(Depot->isValid(StackDepotSize));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned, if the DCHECK fails, how do we know why it's failed? We still need to check all the logic in isValid and review the two arguments of init. Why don't we just stop the initialization when we see something wrong? If you already know the arguments are going to generate an invalid object, why do you still do that?

The reason for the vector example is, after construction, we don't need to verify the size and content like

std::vector<int> foo(4, 0);
DCHECK_EQ(foo.size(), 4);
for (int v : foo)
  DCHECK_EQ(v, 0);

Of course you may want to add some debug checks in the implementation but not asking the user to verify it. In this case, the user has to do the verification and I think it is kind of unreasonable.

@fmayer fmayer merged commit eff77d8 into llvm:main Feb 6, 2024
4 checks passed
fmayer added a commit that referenced this pull request Feb 6, 2024
jrguzman-ms pushed a commit to msft-mirror-aosp/platform.external.scudo that referenced this pull request Feb 6, 2024
…(#80777)

Reverts llvm/llvm-project#74515

Broke build: https://lab.llvm.org/buildbot/#/builders/75/builds/42512

GitOrigin-RevId: c3291253c3b5d1794492ccebe39b7c2c5f74c378
Change-Id: Ie2145eb12108da8e7ed090d71cc71db4fa25fee3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants