Skip to content

Commit

Permalink
[scudo][standalone] Compact pointers for Caches/Batches
Browse files Browse the repository at this point in the history
This CL introduces configuration options to allow pointers to be
compacted in the thread-specific caches and transfer batches. This
offers the possibility to have them use 32-bit of space instead of
64-bit for the 64-bit Primary, thus cutting the size of the caches
and batches by nearly half (and as such the memory used in size
class 0). The cost is an additional read from the region information
in the fast path.

This is not a new idea, as it's being used in the sanitizer_common
64-bit primary. The difference here is that it is configurable via
the allocator config, with the possibility of not compacting at all.

This CL enables compacting pointers in the Android and Fuchsia default
configurations.

Differential Revision: https://reviews.llvm.org/D96435
  • Loading branch information
Kostya Kortchinsky committed Feb 25, 2021
1 parent ec4408a commit 2c56776
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 74 deletions.
42 changes: 41 additions & 1 deletion compiler-rt/lib/scudo/standalone/allocator_config.h
Expand Up @@ -21,6 +21,35 @@

namespace scudo {

// The combined allocator uses a structure as a template argument that
// specifies the configuration options for the various subcomponents of the
// allocator.
//
// struct ExampleConfig {
// // SizeClasMmap to use with the Primary.
// using SizeClassMap = DefaultSizeClassMap;
// // Indicates possible support for Memory Tagging.
// static const bool MaySupportMemoryTagging = false;
// // Defines the Primary allocator to use.
// typedef SizeClassAllocator64<ExampleConfig> Primary;
// // Log2 of the size of a size class region, as used by the Primary.
// static const uptr PrimaryRegionSizeLog = 30U;
// // Defines the type and scale of a compact pointer. A compact pointer can
// // be understood as the offset of a pointer within the region it belongs
// // to, in increments of a power-of-2 scale.
// // eg: Ptr = Base + (CompactPtr << Scale).
// typedef u32 PrimaryCompactPtrT;
// static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
// // Defines the minimal & maximal release interval that can be set.
// static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
// static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
// // Defines the type of cache used by the Secondary. Some additional
// // configuration entries can be necessary depending on the Cache.
// typedef MapAllocatorNoCache SecondaryCache;
// // Thread-Specific Data Registry used, shared or exclusive.
// template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>;
// };

// Default configurations for various platforms.

struct DefaultConfig {
Expand All @@ -29,10 +58,13 @@ struct DefaultConfig {

#if SCUDO_CAN_USE_PRIMARY64
typedef SizeClassAllocator64<DefaultConfig> Primary;
static const uptr PrimaryRegionSizeLog = 30U;
static const uptr PrimaryRegionSizeLog = 32U;
typedef uptr PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = 0;
#else
typedef SizeClassAllocator32<DefaultConfig> Primary;
static const uptr PrimaryRegionSizeLog = 19U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
Expand All @@ -55,9 +87,12 @@ struct AndroidConfig {
#if SCUDO_CAN_USE_PRIMARY64
typedef SizeClassAllocator64<AndroidConfig> Primary;
static const uptr PrimaryRegionSizeLog = 28U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
#else
typedef SizeClassAllocator32<AndroidConfig> Primary;
static const uptr PrimaryRegionSizeLog = 18U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000;
Expand All @@ -81,9 +116,12 @@ struct AndroidSvelteConfig {
#if SCUDO_CAN_USE_PRIMARY64
typedef SizeClassAllocator64<AndroidSvelteConfig> Primary;
static const uptr PrimaryRegionSizeLog = 27U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
#else
typedef SizeClassAllocator32<AndroidSvelteConfig> Primary;
static const uptr PrimaryRegionSizeLog = 16U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000;
Expand All @@ -107,6 +145,8 @@ struct FuchsiaConfig {

typedef SizeClassAllocator64<FuchsiaConfig> Primary;
static const uptr PrimaryRegionSizeLog = 30U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;

Expand Down
30 changes: 15 additions & 15 deletions compiler-rt/lib/scudo/standalone/local_cache.h
Expand Up @@ -17,24 +17,25 @@ namespace scudo {

template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;

struct TransferBatch {
static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint;
void setFromArray(void **Array, u32 N) {
void setFromArray(CompactPtrT *Array, u32 N) {
DCHECK_LE(N, MaxNumCached);
Count = N;
memcpy(Batch, Array, sizeof(void *) * Count);
memcpy(Batch, Array, sizeof(Batch[0]) * Count);
}
void clear() { Count = 0; }
void add(void *P) {
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
Batch[Count++] = P;
}
void copyToArray(void **Array) const {
memcpy(Array, Batch, sizeof(void *) * Count);
void copyToArray(CompactPtrT *Array) const {
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
}
u32 getCount() const { return Count; }
void *get(u32 I) const {
CompactPtrT get(u32 I) const {
DCHECK_LE(I, Count);
return Batch[I];
}
Expand All @@ -45,7 +46,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {

private:
u32 Count;
void *Batch[MaxNumCached];
CompactPtrT Batch[MaxNumCached];
};

void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) {
Expand Down Expand Up @@ -78,13 +79,10 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
// Count, while Chunks might be further off (depending on Count). That keeps
// the memory accesses in close quarters.
const uptr ClassSize = C->ClassSize;
void *P = C->Chunks[--C->Count];
// The jury is still out as to whether any kind of PREFETCH here increases
// performance. It definitely decreases performance on Android though.
// if (!SCUDO_ANDROID) PREFETCH(P);
CompactPtrT CompactP = C->Chunks[--C->Count];
Stats.add(StatAllocated, ClassSize);
Stats.sub(StatFree, ClassSize);
return P;
return Allocator->decompactPtr(ClassId, CompactP);
}

void deallocate(uptr ClassId, void *P) {
Expand All @@ -97,7 +95,8 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
drain(C, ClassId);
// See comment in allocate() about memory accesses.
const uptr ClassSize = C->ClassSize;
C->Chunks[C->Count++] = P;
C->Chunks[C->Count++] =
Allocator->compactPtr(ClassId, reinterpret_cast<uptr>(P));
Stats.sub(StatAllocated, ClassSize);
Stats.add(StatFree, ClassSize);
}
Expand All @@ -124,7 +123,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
u32 Count;
u32 MaxCount;
uptr ClassSize;
void *Chunks[2 * TransferBatch::MaxNumCached];
CompactPtrT Chunks[2 * TransferBatch::MaxNumCached];
};
PerClass PerClassArray[NumClasses];
LocalStats Stats;
Expand Down Expand Up @@ -166,7 +165,8 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {

NOINLINE void drain(PerClass *C, uptr ClassId) {
const u32 Count = Min(C->MaxCount / 2, C->Count);
TransferBatch *B = createBatch(ClassId, C->Chunks[0]);
TransferBatch *B =
createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0]));
if (UNLIKELY(!B))
reportOutOfMemory(
SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId));
Expand Down
27 changes: 20 additions & 7 deletions compiler-rt/lib/scudo/standalone/primary32.h
Expand Up @@ -41,6 +41,7 @@ namespace scudo {

template <typename Config> class SizeClassAllocator32 {
public:
typedef typename Config::PrimaryCompactPtrT CompactPtrT;
typedef typename Config::SizeClassMap SizeClassMap;
// The bytemap can only track UINT8_MAX - 1 classes.
static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), "");
Expand All @@ -67,7 +68,7 @@ template <typename Config> class SizeClassAllocator32 {

u32 Seed;
const u64 Time = getMonotonicTime();
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
Seed = static_cast<u32>(
Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
for (uptr I = 0; I < NumClasses; I++) {
Expand Down Expand Up @@ -102,6 +103,14 @@ template <typename Config> class SizeClassAllocator32 {
PossibleRegions.unmapTestOnly();
}

CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const {
return static_cast<CompactPtrT>(Ptr);
}

void *decompactPtr(UNUSED uptr ClassId, CompactPtrT CompactPtr) const {
return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr));
}

TransferBatch *popBatch(CacheT *C, uptr ClassId) {
DCHECK_LT(ClassId, NumClasses);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
Expand Down Expand Up @@ -359,17 +368,18 @@ template <typename Config> class SizeClassAllocator32 {
// Fill the transfer batches and put them in the size-class freelist. We
// need to randomize the blocks for security purposes, so we first fill a
// local array that we then shuffle before populating the batches.
void *ShuffleArray[ShuffleArraySize];
CompactPtrT ShuffleArray[ShuffleArraySize];
DCHECK_LE(NumberOfBlocks, ShuffleArraySize);

uptr P = Region + Offset;
for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
ShuffleArray[I] = reinterpret_cast<void *>(P);
ShuffleArray[I] = reinterpret_cast<CompactPtrT>(P);
// No need to shuffle the batches size class.
if (ClassId != SizeClassMap::BatchClassId)
shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState);
for (u32 I = 0; I < NumberOfBlocks;) {
TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]);
TransferBatch *B =
C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I]));
if (UNLIKELY(!B))
return nullptr;
const u32 N = Min(MaxCount, NumberOfBlocks - I);
Expand Down Expand Up @@ -435,7 +445,7 @@ template <typename Config> class SizeClassAllocator32 {
if (BlockSize < PageSize / 16U) {
if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
return 0;
// We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
// We want 8x% to 9x% free bytes (the larger the block, the lower the %).
if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
(100U - 1U - BlockSize / 16U))
return 0;
Expand Down Expand Up @@ -463,8 +473,11 @@ template <typename Config> class SizeClassAllocator32 {
auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
};
releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions,
BlockSize, &Recorder, SkipRegion);
auto DecompactPtr = [](CompactPtrT CompactPtr) {
return reinterpret_cast<uptr>(CompactPtr);
};
releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize,
&Recorder, DecompactPtr, SkipRegion);
if (Recorder.getReleasedRangesCount() > 0) {
Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
Expand Down

0 comments on commit 2c56776

Please sign in to comment.