[scudo][standalone] Introduce the Primary(s) and LocalCache

Summary: This CL introduces the 32 & 64-bit primary allocators, and associated Local Cache. While the general idea is mostly similar to what exists in sanitizer_common, it departs from the original code somewhat significantly: - the 64-bit primary no longer uses a free array at the end of a region but uses batches of free blocks in region 0, allowing for a convergence with the 32-bit primary behavior; - as a result, there is only one (templated) local cache type for both primary allocators, and memory reclaiming can be implemented similarly for the 32-bit & 64-bit platforms; - 64-bit primary regions are handled a bit differently: we do not reserve 4TB of memory that we split, but reserve `NumClasses * 2^RegionSizeLog`, each region being offseted by a random number of pages from its computed base. A side effect of this is that the 64-bit primary works on 32-bit platform (I don't think we want to encourage it but it's an interesting side effect); Reviewers: vitalybuka, eugenis, morehouse, hctim Reviewed By: morehouse Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D61745 llvm-svn: 361159
llvm · May 24, 2019 · 4ce83d0 · 4ce83d0
1 parent b109073
commit 4ce83d0
Show file tree

Hide file tree

Showing 7 changed files with 1,128 additions and 9 deletions.
diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
@@ -68,8 +68,11 @@ set(SCUDO_HEADERS
   internal_defs.h
   linux.h
   list.h
+  local_cache.h
   mutex.h
   platform.h
+  primary32.h
+  primary64.h
   quarantine.h
   release.h
   report.h

diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -0,0 +1,178 @@
+//===-- local_cache.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_LOCAL_CACHE_H_
+#define SCUDO_LOCAL_CACHE_H_
+
+#include "internal_defs.h"
+#include "stats.h"
+
+namespace scudo {
+
+template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
+  typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
+
+  struct TransferBatch {
+    static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint;
+    void setFromArray(void **Array, u32 N) {
+      DCHECK_LE(N, MaxNumCached);
+      for (u32 I = 0; I < N; I++)
+        Batch[I] = Array[I];
+      Count = N;
+    }
+    void clear() { Count = 0; }
+    void add(void *P) {
+      DCHECK_LT(Count, MaxNumCached);
+      Batch[Count++] = P;
+    }
+    void copyToArray(void **Array) const {
+      for (u32 I = 0; I < Count; I++)
+        Array[I] = Batch[I];
+    }
+    u32 getCount() const { return Count; }
+    void *get(u32 I) const {
+      DCHECK_LE(I, Count);
+      return Batch[I];
+    }
+    static u32 MaxCached(uptr Size) {
+      return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size));
+    }
+    TransferBatch *Next;
+
+  private:
+    u32 Count;
+    void *Batch[MaxNumCached];
+  };
+
+  void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) {
+    Stats.initLinkerInitialized();
+    if (S)
+      S->link(&Stats);
+    Allocator = A;
+  }
+
+  void init(GlobalStats *S, SizeClassAllocator *A) {
+    memset(this, 0, sizeof(*this));
+    initLinkerInitialized(S, A);
+  }
+
+  void destroy(GlobalStats *S) {
+    drain();
+    if (S)
+      S->unlink(&Stats);
+  }
+
+  void *allocate(uptr ClassId) {
+    CHECK_LT(ClassId, NumClasses);
+    PerClass *C = &PerClassArray[ClassId];
+    if (C->Count == 0) {
+      if (UNLIKELY(!refill(C, ClassId)))
+        return nullptr;
+      DCHECK_GT(C->Count, 0);
+    }
+    // We read ClassSize first before accessing Chunks because it's adjacent to
+    // Count, while Chunks might be further off (depending on Count). That keeps
+    // the memory accesses in close quarters.
+    const uptr ClassSize = C->ClassSize;
+    void *P = C->Chunks[--C->Count];
+    // The jury is still out as to whether any kind of PREFETCH here increases
+    // performance. It definitely decreases performance on Android though.
+    // if (!SCUDO_ANDROID) PREFETCH(P);
+    Stats.add(StatAllocated, ClassSize);
+    return P;
+  }
+
+  void deallocate(uptr ClassId, void *P) {
+    CHECK_LT(ClassId, NumClasses);
+    PerClass *C = &PerClassArray[ClassId];
+    // We still have to initialize the cache in the event that the first heap
+    // operation in a thread is a deallocation.
+    initCacheMaybe(C);
+    if (C->Count == C->MaxCount)
+      drain(C, ClassId);
+    // See comment in allocate() about memory accesses.
+    const uptr ClassSize = C->ClassSize;
+    C->Chunks[C->Count++] = P;
+    Stats.sub(StatAllocated, ClassSize);
+  }
+
+  void drain() {
+    for (uptr I = 0; I < NumClasses; I++) {
+      PerClass *C = &PerClassArray[I];
+      while (C->Count > 0)
+        drain(C, I);
+    }
+  }
+
+  TransferBatch *createBatch(uptr ClassId, void *B) {
+    if (ClassId != SizeClassMap::BatchClassId)
+      B = allocate(SizeClassMap::BatchClassId);
+    return reinterpret_cast<TransferBatch *>(B);
+  }
+
+  LocalStats &getStats() { return Stats; }
+
+private:
+  static const uptr NumClasses = SizeClassMap::NumClasses;
+  struct PerClass {
+    u32 Count;
+    u32 MaxCount;
+    uptr ClassSize;
+    void *Chunks[2 * TransferBatch::MaxNumCached];
+  };
+  PerClass PerClassArray[NumClasses];
+  LocalStats Stats;
+  SizeClassAllocator *Allocator;
+
+  ALWAYS_INLINE void initCacheMaybe(PerClass *C) {
+    if (LIKELY(C->MaxCount))
+      return;
+    initCache();
+    DCHECK_NE(C->MaxCount, 0U);
+  }
+
+  NOINLINE void initCache() {
+    for (uptr I = 0; I < NumClasses; I++) {
+      PerClass *P = &PerClassArray[I];
+      const uptr Size = SizeClassAllocator::getSizeByClassId(I);
+      P->MaxCount = 2 * TransferBatch::MaxCached(Size);
+      P->ClassSize = Size;
+    }
+  }
+
+  void destroyBatch(uptr ClassId, void *B) {
+    if (ClassId != SizeClassMap::BatchClassId)
+      deallocate(SizeClassMap::BatchClassId, B);
+  }
+
+  NOINLINE bool refill(PerClass *C, uptr ClassId) {
+    initCacheMaybe(C);
+    TransferBatch *B = Allocator->popBatch(this, ClassId);
+    if (UNLIKELY(!B))
+      return false;
+    DCHECK_GT(B->getCount(), 0);
+    B->copyToArray(C->Chunks);
+    C->Count = B->getCount();
+    destroyBatch(ClassId, B);
+    return true;
+  }
+
+  NOINLINE void drain(PerClass *C, uptr ClassId) {
+    const u32 Count = Min(C->MaxCount / 2, C->Count);
+    const uptr FirstIndexToDrain = C->Count - Count;
+    TransferBatch *B = createBatch(ClassId, C->Chunks[FirstIndexToDrain]);
+    CHECK(B);
+    B->setFromArray(&C->Chunks[FirstIndexToDrain], Count);
+    C->Count -= Count;
+    Allocator->pushBatch(ClassId, B);
+  }
+};
+
+} // namespace scudo
+
+#endif // SCUDO_LOCAL_CACHE_H_