Skip to content

Commit

Permalink
[scudo][standalone] Allow the release of smaller sizes
Browse files Browse the repository at this point in the history
Initially we were avoiding the release of smaller size classes due to
the fact that it was an expensive operation, particularly on 32-bit
platforms. With a lot of batches, and given that there are a lot of
blocks per page, this was a lengthy operation with little results.

There has been some improvements since then to the 32-bit release,
and we still have some criterias preventing us from wasting time
(eg, 9x% free blocks in the class size, etc).

Allowing to release blocks < 128 bytes helps in situations where a lot
of small chunks would not have been reclaimed if not for a forced
reclaiming.

Additionally change some `CHECK` to `DCHECK` and rearrange a bit the
code.

I didn't experience any regressions in my benchmarks.

Differential Revision: https://reviews.llvm.org/D93141
  • Loading branch information
Kostya Kortchinsky committed Dec 17, 2020
1 parent 4a327bd commit 1dbf2c9
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 58 deletions.
12 changes: 4 additions & 8 deletions compiler-rt/lib/scudo/standalone/primary32.h
Expand Up @@ -76,17 +76,12 @@ class SizeClassAllocator32 {
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
Seed = static_cast<u32>(
Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
const uptr PageSize = getPageSizeCached();
for (uptr I = 0; I < NumClasses; I++) {
SizeClassInfo *Sci = getSizeClassInfo(I);
Sci->RandState = getRandomU32(&Seed);
// Sci->MaxRegionIndex is already initialized to 0.
Sci->MinRegionIndex = NumRegions;
// See comment in the 64-bit primary about releasing smaller size classes.
Sci->CanRelease = (I != SizeClassMap::BatchClassId) &&
(getSizeByClassId(I) >= (PageSize / 32));
if (Sci->CanRelease)
Sci->ReleaseInfo.LastReleaseAtNs = Time;
Sci->ReleaseInfo.LastReleaseAtNs = Time;
}
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
}
Expand Down Expand Up @@ -137,7 +132,7 @@ class SizeClassAllocator32 {
ScopedLock L(Sci->Mutex);
Sci->FreeList.push_front(B);
Sci->Stats.PushedBlocks += B->getCount();
if (Sci->CanRelease)
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Sci, ClassId);
}

Expand Down Expand Up @@ -217,6 +212,8 @@ class SizeClassAllocator32 {
uptr releaseToOS() {
uptr TotalReleasedBytes = 0;
for (uptr I = 0; I < NumClasses; I++) {
if (I == SizeClassMap::BatchClassId)
continue;
SizeClassInfo *Sci = getSizeClassInfo(I);
ScopedLock L(Sci->Mutex);
TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true);
Expand Down Expand Up @@ -262,7 +259,6 @@ class SizeClassAllocator32 {
uptr CurrentRegion;
uptr CurrentRegionAllocated;
SizeClassStats Stats;
bool CanRelease;
u32 RandState;
uptr AllocatedUser;
// Lowest & highest region index allocated for this size class, to avoid
Expand Down
19 changes: 5 additions & 14 deletions compiler-rt/lib/scudo/standalone/primary64.h
Expand Up @@ -80,17 +80,7 @@ class SizeClassAllocator64 {
Region->RegionBeg =
getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
Region->RandState = getRandomU32(&Seed);
// Releasing smaller size classes doesn't necessarily yield to a
// meaningful RSS impact: there are more blocks per page, they are
// randomized around, and thus pages are less likely to be entirely empty.
// On top of this, attempting to release those require more iterations and
// memory accesses which ends up being fairly costly. The current lower
// limit is mostly arbitrary and based on empirical observations.
// TODO(kostyak): make the lower limit a runtime option
Region->CanRelease = (I != SizeClassMap::BatchClassId) &&
(getSizeByClassId(I) >= (PageSize / 32));
if (Region->CanRelease)
Region->ReleaseInfo.LastReleaseAtNs = Time;
Region->ReleaseInfo.LastReleaseAtNs = Time;
}
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));

Expand Down Expand Up @@ -129,7 +119,7 @@ class SizeClassAllocator64 {
ScopedLock L(Region->Mutex);
Region->FreeList.push_front(B);
Region->Stats.PushedBlocks += B->getCount();
if (Region->CanRelease)
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Region, ClassId);
}

Expand Down Expand Up @@ -201,6 +191,8 @@ class SizeClassAllocator64 {
uptr releaseToOS() {
uptr TotalReleasedBytes = 0;
for (uptr I = 0; I < NumClasses; I++) {
if (I == SizeClassMap::BatchClassId)
continue;
RegionInfo *Region = getRegionInfo(I);
ScopedLock L(Region->Mutex);
TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true);
Expand Down Expand Up @@ -291,7 +283,6 @@ class SizeClassAllocator64 {
HybridMutex Mutex;
SinglyLinkedList<TransferBatch> FreeList;
RegionStats Stats;
bool CanRelease;
bool Exhausted;
u32 RandState;
uptr RegionBeg;
Expand Down Expand Up @@ -417,7 +408,7 @@ class SizeClassAllocator64 {
const uptr BlockSize = getSizeByClassId(ClassId);
const uptr PageSize = getPageSizeCached();

CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
const uptr BytesInFreeList =
Region->AllocatedUser -
(Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
Expand Down
61 changes: 25 additions & 36 deletions compiler-rt/lib/scudo/standalone/release.h
Expand Up @@ -52,20 +52,20 @@ class PackedCounterArray {
PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion,
uptr MaxValue)
: Regions(NumberOfRegions), NumCounters(CountersPerRegion) {
CHECK_GT(Regions, 0);
CHECK_GT(NumCounters, 0);
CHECK_GT(MaxValue, 0);
DCHECK_GT(Regions, 0);
DCHECK_GT(NumCounters, 0);
DCHECK_GT(MaxValue, 0);
constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL;
// Rounding counter storage size up to the power of two allows for using
// bit shifts calculating particular counter's Index and offset.
const uptr CounterSizeBits =
roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1);
CHECK_LE(CounterSizeBits, MaxCounterBits);
DCHECK_LE(CounterSizeBits, MaxCounterBits);
CounterSizeBitsLog = getLog2(CounterSizeBits);
CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits);

const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog;
CHECK_GT(PackingRatio, 0);
DCHECK_GT(PackingRatio, 0);
PackingRatioLog = getLog2(PackingRatio);
BitOffsetMask = PackingRatio - 1;

Expand Down Expand Up @@ -235,48 +235,38 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
// Each chunk affects one page only.
for (const auto &It : FreeList) {
// If dealing with a TransferBatch, the first pointer of the batch will
// point to the batch itself, we do not want to mark this for release as
// the batch is in use, so skip the first entry.
const bool IsTransferBatch =
(It.getCount() != 0) &&
(reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
for (u32 I = 0; I < It.getCount(); I++) {
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
// This takes care of P < Base and P >= Base + RoundedSize.
if (P < RoundedSize) {
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
const uptr PInRegion = P - RegionIndex * RegionSize;
Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
}
if (UNLIKELY(P >= RoundedSize))
continue;
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
const uptr PInRegion = P - RegionIndex * RegionSize;
Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
}
}
} else {
// In all other cases chunks might affect more than one page.
DCHECK_GE(RegionSize, BlockSize);
const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize;
for (const auto &It : FreeList) {
// See TransferBatch comment above.
const bool IsTransferBatch =
(It.getCount() != 0) &&
(reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
for (u32 I = 0; I < It.getCount(); I++) {
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
// This takes care of P < Base and P >= Base + RoundedSize.
if (P < RoundedSize) {
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
uptr PInRegion = P - RegionIndex * RegionSize;
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
// The last block in a region might straddle a page, so if it's
// free, we mark the following "pretend" memory block(s) as free.
if (PInRegion == LastBlockInRegion) {
if (UNLIKELY(P >= RoundedSize))
continue;
const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
uptr PInRegion = P - RegionIndex * RegionSize;
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
// The last block in a region might straddle a page, so if it's
// free, we mark the following "pretend" memory block(s) as free.
if (PInRegion == LastBlockInRegion) {
PInRegion += BlockSize;
while (PInRegion < RoundedRegionSize) {
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
PInRegion += BlockSize;
while (PInRegion < RoundedRegionSize) {
Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
(PInRegion + BlockSize - 1) >> PageSizeLog);
PInRegion += BlockSize;
}
}
}
}
Expand Down Expand Up @@ -327,7 +317,6 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
}
}
PrevPageBoundary = PageBoundary;

RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage);
}
}
Expand Down

0 comments on commit 1dbf2c9

Please sign in to comment.