diff --git a/compiler-rt/lib/scudo/standalone/allocator_common.h b/compiler-rt/lib/scudo/standalone/allocator_common.h index 46dc7c0f3b914..95f4776ac596d 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_common.h +++ b/compiler-rt/lib/scudo/standalone/allocator_common.h @@ -40,7 +40,6 @@ template struct TransferBatch { B->Count = static_cast(B->Count - N); } void clear() { Count = 0; } - bool empty() { return Count == 0; } void add(CompactPtrT P) { DCHECK_LT(Count, MaxNumCached); Batch[Count++] = P; @@ -49,12 +48,6 @@ template struct TransferBatch { memcpy(Array, Batch, sizeof(Batch[0]) * Count); clear(); } - - void moveNToArray(CompactPtrT *Array, u16 N) { - DCHECK_LE(N, Count); - memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * Count); - Count -= N; - } u16 getCount() const { return Count; } bool isEmpty() const { return Count == 0U; } CompactPtrT get(u16 I) const { diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index c86e75b8fd66a..4d03b282d000d 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -191,21 +191,38 @@ template class SizeClassAllocator32 { return BlockSize > PageSize; } + // Note that the `MaxBlockCount` will be used when we support arbitrary blocks + // count. Now it's the same as the number of blocks stored in the + // `TransferBatch`. u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, - const u16 MaxBlockCount) { + UNUSED const u16 MaxBlockCount) { + TransferBatchT *B = popBatch(C, ClassId); + if (!B) + return 0; + + const u16 Count = B->getCount(); + DCHECK_GT(Count, 0U); + B->moveToArray(ToArray); + + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, B); + + return Count; + } + + TransferBatchT *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); ScopedLock L(Sci->Mutex); - - u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount); - if (UNLIKELY(PopCount == 0)) { + TransferBatchT *B = popBatchImpl(C, ClassId, Sci); + if (UNLIKELY(!B)) { if (UNLIKELY(!populateFreeList(C, ClassId, Sci))) - return 0U; - PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount); - DCHECK_NE(PopCount, 0U); + return nullptr; + B = popBatchImpl(C, ClassId, Sci); + // if `populateFreeList` succeeded, we are supposed to get free blocks. + DCHECK_NE(B, nullptr); } - - return PopCount; + return B; } // Push the array of free blocks to the designated batch group. @@ -493,7 +510,7 @@ template class SizeClassAllocator32 { // by TransferBatch is also free for use. We don't need to recycle the // TransferBatch. Note that the correctness is maintained by the invariant, // - // Each popBlocks() request returns the entire TransferBatch. Returning + // The unit of each popBatch() request is entire TransferBatch. Return // part of the blocks in a TransferBatch is invalid. // // This ensures that TransferBatch won't leak the address itself while it's @@ -617,7 +634,7 @@ template class SizeClassAllocator32 { BG->Batches.push_front(TB); BG->PushedBlocks = 0; BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; + BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId)); return BG; }; @@ -709,11 +726,14 @@ template class SizeClassAllocator32 { InsertBlocks(Cur, Array + Size - Count, Count); } - u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, - CompactPtrT *ToArray, const u16 MaxBlockCount) + // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest + // group id will be considered first. + // + // The region mutex needs to be held while calling this method. + TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci) REQUIRES(Sci->Mutex) { if (Sci->FreeListInfo.BlockList.empty()) - return 0U; + return nullptr; SinglyLinkedList &Batches = Sci->FreeListInfo.BlockList.front()->Batches; @@ -726,57 +746,33 @@ template class SizeClassAllocator32 { // Block used by `BatchGroup` is from BatchClassId. Turn the block into // `TransferBatch` with single block. TransferBatchT *TB = reinterpret_cast(BG); - ToArray[0] = - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB)); + TB->clear(); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB))); Sci->FreeListInfo.PoppedBlocks += 1; - return 1U; + return TB; } - // So far, instead of always filling the blocks to `MaxBlockCount`, we only - // examine single `TransferBatch` to minimize the time spent on the primary - // allocator. Besides, the sizes of `TransferBatch` and - // `CacheT::getMaxCached()` may also impact the time spent on accessing the - // primary allocator. - // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` - // blocks and/or adjust the size of `TransferBatch` according to - // `CacheT::getMaxCached()`. TransferBatchT *B = Batches.front(); + Batches.pop_front(); DCHECK_NE(B, nullptr); DCHECK_GT(B->getCount(), 0U); - // BachClassId should always take all blocks in the TransferBatch. Read the - // comment in `pushBatchClassBlocks()` for more details. - const u16 PopCount = ClassId == SizeClassMap::BatchClassId - ? B->getCount() - : Min(MaxBlockCount, B->getCount()); - B->moveNToArray(ToArray, PopCount); - - // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be - // done without holding `Mutex`. - if (B->empty()) { - Batches.pop_front(); - // `TransferBatch` of BatchClassId is self-contained, no need to - // deallocate. Read the comment in `pushBatchClassBlocks()` for more - // details. + if (Batches.empty()) { + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking while + // allocating. Note that block used by constructing BatchGroup is recorded + // as free blocks in the last element of BatchGroup::Batches. Which means, + // once we pop the last TransferBatch, the block is implicitly + // deallocated. if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, B); - - if (Batches.empty()) { - BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); - Sci->FreeListInfo.BlockList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking - // while allocating. Note that block used for constructing BatchGroup is - // recorded as free blocks in the last element of BatchGroup::Batches. - // Which means, once we pop the last TransferBatch, the block is - // implicitly deallocated. - if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, BG); - } + C->deallocate(SizeClassMap::BatchClassId, BG); } - Sci->FreeListInfo.PoppedBlocks += PopCount; - return PopCount; + Sci->FreeListInfo.PoppedBlocks += B->getCount(); + return B; } NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 971a65ee15f0f..9a642d23620e1 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -12,7 +12,6 @@ #include "allocator_common.h" #include "bytemap.h" #include "common.h" -#include "condition_variable.h" #include "list.h" #include "local_cache.h" #include "mem_map.h" @@ -23,6 +22,8 @@ #include "string_utils.h" #include "thread_annotations.h" +#include "condition_variable.h" + namespace scudo { // SizeClassAllocator64 is an allocator tuned for 64-bit address space. @@ -220,24 +221,41 @@ template class SizeClassAllocator64 { DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); } + // Note that the `MaxBlockCount` will be used when we support arbitrary blocks + // count. Now it's the same as the number of blocks stored in the + // `TransferBatch`. u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, - const u16 MaxBlockCount) { + UNUSED const u16 MaxBlockCount) { + TransferBatchT *B = popBatch(C, ClassId); + if (!B) + return 0; + + const u16 Count = B->getCount(); + DCHECK_GT(Count, 0U); + B->moveToArray(ToArray); + + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, B); + + return Count; + } + + TransferBatchT *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); RegionInfo *Region = getRegionInfo(ClassId); - u16 PopCount = 0; { ScopedLock L(Region->FLLock); - PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); - if (PopCount != 0U) - return PopCount; + TransferBatchT *B = popBatchImpl(C, ClassId, Region); + if (LIKELY(B)) + return B; } bool ReportRegionExhausted = false; + TransferBatchT *B = nullptr; if (conditionVariableEnabled()) { - PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount, - ReportRegionExhausted); + B = popBatchWithCV(C, ClassId, Region, ReportRegionExhausted); } else { while (true) { // When two threads compete for `Region->MMLock`, we only want one of @@ -246,15 +264,13 @@ template class SizeClassAllocator64 { ScopedLock ML(Region->MMLock); { ScopedLock FL(Region->FLLock); - PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); - if (PopCount != 0U) - return PopCount; + if ((B = popBatchImpl(C, ClassId, Region))) + break; } const bool RegionIsExhausted = Region->Exhausted; if (!RegionIsExhausted) - PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, - MaxBlockCount); + B = populateFreeListAndPopBatch(C, ClassId, Region); ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; break; } @@ -270,7 +286,7 @@ template class SizeClassAllocator64 { reportOutOfBatchClass(); } - return PopCount; + return B; } // Push the array of free blocks to the designated batch group. @@ -624,7 +640,7 @@ template class SizeClassAllocator64 { // by TransferBatch is also free for use. We don't need to recycle the // TransferBatch. Note that the correctness is maintained by the invariant, // - // Each popBlocks() request returns the entire TransferBatch. Returning + // The unit of each popBatch() request is entire TransferBatch. Return // part of the blocks in a TransferBatch is invalid. // // This ensures that TransferBatch won't leak the address itself while it's @@ -747,7 +763,7 @@ template class SizeClassAllocator64 { BG->Batches.push_front(TB); BG->PushedBlocks = 0; BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached; + BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId)); return BG; }; @@ -839,10 +855,9 @@ template class SizeClassAllocator64 { InsertBlocks(Cur, Array + Size - Count, Count); } - u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, - CompactPtrT *ToArray, const u16 MaxBlockCount, - bool &ReportRegionExhausted) { - u16 PopCount = 0; + TransferBatchT *popBatchWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, + bool &ReportRegionExhausted) { + TransferBatchT *B = nullptr; while (true) { // We only expect one thread doing the freelist refillment and other @@ -863,8 +878,7 @@ template class SizeClassAllocator64 { const bool RegionIsExhausted = Region->Exhausted; if (!RegionIsExhausted) - PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, - MaxBlockCount); + B = populateFreeListAndPopBatch(C, ClassId, Region); ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; { @@ -891,8 +905,7 @@ template class SizeClassAllocator64 { // blocks were used up right after the refillment. Therefore, we have to // check if someone is still populating the freelist. ScopedLock FL(Region->FLLock); - PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); - if (PopCount != 0U) + if (LIKELY(B = popBatchImpl(C, ClassId, Region))) break; if (!Region->isPopulatingFreeList) @@ -905,19 +918,21 @@ template class SizeClassAllocator64 { // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. Region->FLLockCV.wait(Region->FLLock); - PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); - if (PopCount != 0U) + if (LIKELY(B = popBatchImpl(C, ClassId, Region))) break; } - return PopCount; + return B; } - u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, - CompactPtrT *ToArray, const u16 MaxBlockCount) + // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest + // group id will be considered first. + // + // The region mutex needs to be held while calling this method. + TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region) REQUIRES(Region->FLLock) { if (Region->FreeListInfo.BlockList.empty()) - return 0U; + return nullptr; SinglyLinkedList &Batches = Region->FreeListInfo.BlockList.front()->Batches; @@ -930,64 +945,39 @@ template class SizeClassAllocator64 { // Block used by `BatchGroup` is from BatchClassId. Turn the block into // `TransferBatch` with single block. TransferBatchT *TB = reinterpret_cast(BG); - ToArray[0] = - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB)); + TB->clear(); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB))); Region->FreeListInfo.PoppedBlocks += 1; - return 1U; + return TB; } - // So far, instead of always fill blocks to `MaxBlockCount`, we only exmaine - // single `TransferBatch` to minimize the time spent in the primary - // allocator. Besides, the sizes of `TransferBatch` and - // `CacheT::getMaxCached()` may also impact the time spent on accessing the - // primary allocator. - // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` - // blocks and/or adjust the size of `TransferBatch` according to - // `CacheT::getMaxCached()`. TransferBatchT *B = Batches.front(); + Batches.pop_front(); DCHECK_NE(B, nullptr); DCHECK_GT(B->getCount(), 0U); - // BachClassId should always take all blocks in the TransferBatch. Read the - // comment in `pushBatchClassBlocks()` for more details. - const u16 PopCount = ClassId == SizeClassMap::BatchClassId - ? B->getCount() - : Min(MaxBlockCount, B->getCount()); - B->moveNToArray(ToArray, PopCount); - - // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be - // done without holding `FLLock`. - if (B->empty()) { - Batches.pop_front(); - // `TransferBatch` of BatchClassId is self-contained, no need to - // deallocate. Read the comment in `pushBatchClassBlocks()` for more - // details. - if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, B); - - if (Batches.empty()) { - BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); - Region->FreeListInfo.BlockList.pop_front(); + if (Batches.empty()) { + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); - // We don't keep BatchGroup with zero blocks to avoid empty-checking - // while allocating. Note that block used for constructing BatchGroup is - // recorded as free blocks in the last element of BatchGroup::Batches. - // Which means, once we pop the last TransferBatch, the block is - // implicitly deallocated. - if (ClassId != SizeClassMap::BatchClassId) - C->deallocate(SizeClassMap::BatchClassId, BG); - } + // We don't keep BatchGroup with zero blocks to avoid empty-checking while + // allocating. Note that block used by constructing BatchGroup is recorded + // as free blocks in the last element of BatchGroup::Batches. Which means, + // once we pop the last TransferBatch, the block is implicitly + // deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); } - Region->FreeListInfo.PoppedBlocks += PopCount; + Region->FreeListInfo.PoppedBlocks += B->getCount(); - return PopCount; + return B; } - NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId, - RegionInfo *Region, - CompactPtrT *ToArray, - const u16 MaxBlockCount) + // Refill the freelist and return one batch. + NOINLINE TransferBatchT *populateFreeListAndPopBatch(CacheT *C, uptr ClassId, + RegionInfo *Region) REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr Size = getSizeByClassId(ClassId); const u16 MaxCount = CacheT::getMaxCached(Size); @@ -1004,7 +994,7 @@ template class SizeClassAllocator64 { const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { Region->Exhausted = true; - return 0U; + return nullptr; } if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( @@ -1012,7 +1002,7 @@ template class SizeClassAllocator64 { MAP_ALLOWNOMEM | MAP_RESIZABLE | (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0)))) { - return 0U; + return nullptr; } Region->MemMapInfo.MappedUser += MapSize; C->getStats().add(StatMapped, MapSize); @@ -1059,9 +1049,8 @@ template class SizeClassAllocator64 { pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); } - const u16 PopCount = - popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount); - DCHECK_NE(PopCount, 0U); + TransferBatchT *B = popBatchImpl(C, ClassId, Region); + DCHECK_NE(B, nullptr); // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record // the requests from `PushBlocks` and `PopBatch` which are external @@ -1073,7 +1062,7 @@ template class SizeClassAllocator64 { C->getStats().add(StatFree, AllocatedUser); Region->MemMapInfo.AllocatedUser += AllocatedUser; - return PopCount; + return B; } void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) @@ -1197,7 +1186,7 @@ template class SizeClassAllocator64 { } // Note that we have extracted the `GroupsToRelease` from region freelist. - // It's safe to let pushBlocks()/popBlocks() access the remaining region + // It's safe to let pushBlocks()/popBatches() access the remaining region // freelist. In the steps 3 and 4, we will temporarily release the FLLock // and lock it again before step 5. diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index f64a5143b30d4..18171511758a1 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -237,6 +237,7 @@ struct SmallRegionsConfig { // For the 32-bit one, it requires actually exhausting memory, so we skip it. TEST(ScudoPrimaryTest, Primary64OOM) { using Primary = scudo::SizeClassAllocator64; + using TransferBatch = Primary::TransferBatchT; Primary Allocator; Allocator.init(/*ReleaseToOsInterval=*/-1); typename Primary::CacheT Cache; @@ -244,26 +245,29 @@ TEST(ScudoPrimaryTest, Primary64OOM) { Stats.init(); Cache.init(&Stats, &Allocator); bool AllocationFailed = false; - std::vector Blocks; + std::vector Batches; const scudo::uptr ClassId = Primary::SizeClassMap::LargestClassId; const scudo::uptr Size = Primary::getSizeByClassId(ClassId); - const scudo::u16 MaxCachedBlockCount = Primary::CacheT::getMaxCached(Size); + typename Primary::CacheT::CompactPtrT Blocks[TransferBatch::MaxNumCached]; for (scudo::uptr I = 0; I < 10000U; I++) { - for (scudo::uptr J = 0; J < MaxCachedBlockCount; ++J) { - void *Ptr = Cache.allocate(ClassId); - if (Ptr == nullptr) { - AllocationFailed = true; - break; - } - memset(Ptr, 'B', Size); - Blocks.push_back(Ptr); + TransferBatch *B = Allocator.popBatch(&Cache, ClassId); + if (!B) { + AllocationFailed = true; + break; } + for (scudo::u16 J = 0; J < B->getCount(); J++) + memset(Allocator.decompactPtr(ClassId, B->get(J)), 'B', Size); + Batches.push_back(B); + } + while (!Batches.empty()) { + TransferBatch *B = Batches.back(); + Batches.pop_back(); + const scudo::u16 Count = B->getCount(); + B->moveToArray(Blocks); + Allocator.pushBlocks(&Cache, ClassId, Blocks, Count); + Cache.deallocate(Primary::SizeClassMap::BatchClassId, B); } - - for (auto *Ptr : Blocks) - Cache.deallocate(ClassId, Ptr); - Cache.destroy(nullptr); Allocator.releaseToOS(scudo::ReleaseToOS::Force); scudo::ScopedString Str; @@ -338,7 +342,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) { V.push_back(std::make_pair(ClassId, P)); } - // Try to interleave pushBlocks(), popBlocks() and releaseToOS(). + // Try to interleave pushBlocks(), popBatch() and releaseToOS(). Allocator->releaseToOS(scudo::ReleaseToOS::Force); while (!V.empty()) {