Skip to content

Commit

Permalink
[BOLT][Instrumentation] Put Allocator itslef in shared memory by default
Browse files Browse the repository at this point in the history
In absence of instrumentation-file-append-pid option,
global allocator uses shared pages for allocation. However, since it is a
global variable, it gets COW'd after fork if instrumentation-sleep-time
is used, or if a process forks by itself. This means it handles the same
pages to every process which causes hash table corruption. Thus, if we
want shared pages, we need to put the allocator itself in a shared page,
which we do in this commit in __bolt_instr_setup.
I also added a couple of assertions to sanity-check the hash table.

Reviewed By: rafauler, Amir
Differential Revision: https://reviews.llvm.org/D153771
  • Loading branch information
treapster committed Jun 29, 2023
1 parent 02c3724 commit ad4e077
Showing 1 changed file with 45 additions and 14 deletions.
59 changes: 45 additions & 14 deletions bolt/runtime/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,12 @@ class BumpPtrAllocator {

/// Used for allocating indirect call instrumentation counters. Initialized by
/// __bolt_instr_setup, our initialization routine.
BumpPtrAllocator GlobalAlloc;
BumpPtrAllocator *GlobalAlloc;

// Storage for GlobalAlloc which can be shared if not using
// instrumentation-file-append-pid.
void *GlobalMetadataStorage;

} // anonymous namespace

// User-defined placement new operators. We only use those (as opposed to
Expand All @@ -230,6 +235,10 @@ void *operator new[](size_t Sz, BumpPtrAllocator &A, char C) {
memset(Ptr, C, Sz);
return Ptr;
}

// Declaration for global allocator to construct it in shared memory if needed.
// Needed because we can't #include <new>
void *operator new(size_t, void *) noexcept;
// Only called during exception unwinding (useless). We must manually dealloc.
// C++ language weirdness
void operator delete(void *Ptr, BumpPtrAllocator &A) { A.deallocate(Ptr); }
Expand Down Expand Up @@ -264,7 +273,17 @@ class SimpleHashTable {
/// Increment by 1 the value of \p Key. If it is not in this table, it will be
/// added to the table and its value set to 1.
void incrementVal(uint64_t Key, BumpPtrAllocator &Alloc) {
++get(Key, Alloc).Val;
if (!__bolt_instr_conservative) {
TryLock L(M);
if (!L.isLocked())
return;
auto &E = getOrAllocEntry(Key, Alloc);
++E.Val;
return;
}
Lock L(M);
auto &E = getOrAllocEntry(Key, Alloc);
++E.Val;
}

/// Basic member accessing interface. Here we pass the allocator explicitly to
Expand Down Expand Up @@ -308,10 +327,10 @@ class SimpleHashTable {
if (Entry.Key == VacantMarker)
continue;
if (Entry.Key & FollowUpTableMarker) {
forEachElement(Callback, IncSize,
reinterpret_cast<MapEntry *>(Entry.Key &
~FollowUpTableMarker),
args...);
MapEntry *Next =
reinterpret_cast<MapEntry *>(Entry.Key & ~FollowUpTableMarker);
assert(Next != Entries, "Circular reference!");
forEachElement(Callback, IncSize, Next, args...);
continue;
}
Callback(Entry, args...);
Expand Down Expand Up @@ -358,12 +377,18 @@ class SimpleHashTable {
CurEntrySelector = CurEntrySelector % IncSize;
NextLevelTbl[CurEntrySelector] = Entry;
Entry.Key = reinterpret_cast<uint64_t>(NextLevelTbl) | FollowUpTableMarker;
assert((NextLevelTbl[CurEntrySelector].Key & ~FollowUpTableMarker) !=
uint64_t(Entries),
"circular reference created!\n");
return getEntry(NextLevelTbl, Key, Remainder, Alloc, CurLevel + 1);
}

MapEntry &getOrAllocEntry(uint64_t Key, BumpPtrAllocator &Alloc) {
if (TableRoot)
return getEntry(TableRoot, Key, Key, Alloc, 0);
if (TableRoot) {
MapEntry &E = getEntry(TableRoot, Key, Key, Alloc, 0);
assert(!(E.Key & FollowUpTableMarker), "Invalid entry!");
return E;
}
return firstAllocation(Key, Alloc);
}
};
Expand Down Expand Up @@ -1561,13 +1586,19 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
MAP_ANONYMOUS | MapPrivateOrShared | MAP_FIXED, -1, 0);
assert(Ret != MAP_FAILED, "__bolt_instr_setup: Failed to mmap counters!");

// Conservatively reserve 100MiB shared pages
GlobalAlloc.setMaxSize(0x6400000);
GlobalAlloc.setShared(Shared);
GlobalWriteProfileMutex = new (GlobalAlloc, 0) Mutex();
GlobalMetadataStorage = __mmap(0, 4096, PROT_READ | PROT_WRITE,
MapPrivateOrShared | MAP_ANONYMOUS, -1, 0);
assert(GlobalMetadataStorage != MAP_FAILED,
"__bolt_instr_setup: failed to mmap page for metadata!");

GlobalAlloc = new (GlobalMetadataStorage) BumpPtrAllocator;
// Conservatively reserve 100MiB
GlobalAlloc->setMaxSize(0x6400000);
GlobalAlloc->setShared(Shared);
GlobalWriteProfileMutex = new (*GlobalAlloc, 0) Mutex();
if (__bolt_instr_num_ind_calls > 0)
GlobalIndCallCounters =
new (GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];
new (*GlobalAlloc, 0) IndirectCallHashTable[__bolt_instr_num_ind_calls];

if (__bolt_instr_sleep_time != 0) {
// Separate instrumented process to the own process group
Expand All @@ -1582,7 +1613,7 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {

extern "C" __attribute((force_align_arg_pointer)) void
instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
GlobalIndCallCounters[IndCallID].incrementVal(Target, GlobalAlloc);
GlobalIndCallCounters[IndCallID].incrementVal(Target, *GlobalAlloc);
}

/// We receive as in-stack arguments the identifier of the indirect call site
Expand Down

0 comments on commit ad4e077

Please sign in to comment.