Skip to content

Commit

Permalink
Restore "[MemProf] Refactor memory profile matching into MemProfiler …
Browse files Browse the repository at this point in the history
…(NFC)"

This restores commit 29252fd, reverted
in 3498cf5 because it was thought to
cause a bot failure, which ended up being unrelated to this patch set.

Differential Revision: https://reviews.llvm.org/D154872
  • Loading branch information
teresajohnson committed Jul 11, 2023
1 parent 9095d5c commit 9501405
Show file tree
Hide file tree
Showing 3 changed files with 281 additions and 275 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ class ModuleMemProfilerPass : public PassInfoMixin<ModuleMemProfilerPass> {
static bool isRequired() { return true; }
};

// TODO: Remove this declaration and make readMemprof static once the matching
// is moved into its own pass.
class IndexedInstrProfReader;
class TargetLibraryInfo;
void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI);

} // namespace llvm

#endif
264 changes: 264 additions & 0 deletions llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
Expand All @@ -30,16 +33,28 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/HashBuilder.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <map>
#include <set>

using namespace llvm;
using namespace llvm::memprof;

#define DEBUG_TYPE "memprof"

namespace llvm {
extern cl::opt<bool> PGOWarnMissing;
extern cl::opt<bool> NoPGOWarnMismatch;
extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
} // namespace llvm

constexpr int LLVM_MEM_PROFILER_VERSION = 1;

// Size of memory mapped to a single shadow location.
Expand Down Expand Up @@ -128,6 +143,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");

namespace {

Expand Down Expand Up @@ -601,3 +617,251 @@ bool MemProfiler::instrumentFunction(Function &F) {

return FunctionModified;
}

static void addCallsiteMetadata(Instruction &I,
std::vector<uint64_t> &InlinedCallStack,
LLVMContext &Ctx) {
I.setMetadata(LLVMContext::MD_callsite,
buildCallstackMetadata(InlinedCallStack, Ctx));
}

static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
uint32_t Column) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
HashBuilder;
HashBuilder.add(Function, LineOffset, Column);
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
uint64_t Id;
std::memcpy(&Id, Hash.data(), sizeof(Hash));
return Id;
}

static uint64_t computeStackId(const memprof::Frame &Frame) {
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
}

static void addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo) {
SmallVector<uint64_t> StackIds;
for (const auto &StackFrame : AllocInfo->CallStack)
StackIds.push_back(computeStackId(StackFrame));
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
AllocInfo->Info.getAllocCount(),
AllocInfo->Info.getTotalLifetime());
AllocTrie.addCallStack(AllocType, StackIds);
}

// Helper to compare the InlinedCallStack computed from an instruction's debug
// info to a list of Frames from profile data (either the allocation data or a
// callsite). For callsites, the StartIndex to use in the Frame array may be
// non-zero.
static bool
stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
ArrayRef<uint64_t> InlinedCallStack,
unsigned StartIndex = 0) {
auto StackFrame = ProfileCallStack.begin() + StartIndex;
auto InlCallStackIter = InlinedCallStack.begin();
for (; StackFrame != ProfileCallStack.end() &&
InlCallStackIter != InlinedCallStack.end();
++StackFrame, ++InlCallStackIter) {
uint64_t StackId = computeStackId(*StackFrame);
if (StackId != *InlCallStackIter)
return false;
}
// Return true if we found and matched all stack ids from the call
// instruction.
return InlCallStackIter == InlinedCallStack.end();
}

void llvm::readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI) {
auto &Ctx = M.getContext();

auto FuncName = getPGOFuncName(F);
auto FuncGUID = Function::getGUID(FuncName);
Expected<memprof::MemProfRecord> MemProfResult =
MemProfReader->getMemProfRecord(FuncGUID);
if (Error E = MemProfResult.takeError()) {
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
bool SkipWarning = false;
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
<< ": ");
if (Err == instrprof_error::unknown_function) {
NumOfMemProfMissing++;
SkipWarning = !PGOWarnMissing;
LLVM_DEBUG(dbgs() << "unknown function");
} else if (Err == instrprof_error::hash_mismatch) {
SkipWarning =
NoPGOWarnMismatch ||
(NoPGOWarnMismatchComdatWeak &&
(F.hasComdat() ||
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
}

if (SkipWarning)
return;

std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
Twine(" Hash = ") + std::to_string(FuncGUID))
.str();

Ctx.diagnose(
DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
});
return;
}

// Build maps of the location hash to all profile data with that leaf location
// (allocation info and the callsites).
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
// For the callsites we need to record the index of the associated frame in
// the frame array (see comments below where the map entries are added).
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
LocHashToCallSites;
const auto MemProfRec = std::move(MemProfResult.get());
for (auto &AI : MemProfRec.AllocSites) {
// Associate the allocation info with the leaf frame. The later matching
// code will match any inlined call sequences in the IR with a longer prefix
// of call stack frames.
uint64_t StackId = computeStackId(AI.CallStack[0]);
LocHashToAllocInfo[StackId].insert(&AI);
}
for (auto &CS : MemProfRec.CallSites) {
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0;
for (auto &StackFrame : CS) {
uint64_t StackId = computeStackId(StackFrame);
LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
break;
}
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
}

auto GetOffset = [](const DILocation *DIL) {
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
0xffff;
};

// Now walk the instructions, looking up the associated profile data using
// dbug locations.
for (auto &BB : F) {
for (auto &I : BB) {
if (I.isDebugOrPseudoInst())
continue;
// We are only interested in calls (allocation or interior call stack
// context calls).
auto *CI = dyn_cast<CallBase>(&I);
if (!CI)
continue;
auto *CalledFunction = CI->getCalledFunction();
if (CalledFunction && CalledFunction->isIntrinsic())
continue;
// List of call stack ids computed from the location hashes on debug
// locations (leaf to inlined at root).
std::vector<uint64_t> InlinedCallStack;
// Was the leaf location found in one of the profile maps?
bool LeafFound = false;
// If leaf was found in a map, iterators pointing to its location in both
// of the maps. It might exist in neither, one, or both (the latter case
// can happen because we don't currently have discriminators to
// distinguish the case when a single line/col maps to both an allocation
// and another callsite).
std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
AllocInfoIter;
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
unsigned>>>::iterator CallSitesIter;
for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
DIL = DIL->getInlinedAt()) {
// Use C++ linkage name if possible. Need to compile with
// -fdebug-info-for-profiling to get linkage name.
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
auto CalleeGUID = Function::getGUID(Name);
auto StackId =
computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
// LeafFound will only be false on the first iteration, since we either
// set it true or break out of the loop below.
if (!LeafFound) {
AllocInfoIter = LocHashToAllocInfo.find(StackId);
CallSitesIter = LocHashToCallSites.find(StackId);
// Check if the leaf is in one of the maps. If not, no need to look
// further at this call.
if (AllocInfoIter == LocHashToAllocInfo.end() &&
CallSitesIter == LocHashToCallSites.end())
break;
LeafFound = true;
}
InlinedCallStack.push_back(StackId);
}
// If leaf not in either of the maps, skip inst.
if (!LeafFound)
continue;

// First add !memprof metadata from allocation info, if we found the
// instruction's leaf location in that map, and if the rest of the
// instruction's locations match the prefix Frame locations on an
// allocation context with the same leaf.
if (AllocInfoIter != LocHashToAllocInfo.end()) {
// Only consider allocations via new, to reduce unnecessary metadata,
// since those are the only allocations that will be targeted initially.
if (!isNewLikeFn(CI, &TLI))
continue;
// We may match this instruction's location list to multiple MIB
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie;
for (auto *AllocInfo : AllocInfoIter->second) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
// this MIB.
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
InlinedCallStack))
addCallStack(AllocTrie, AllocInfo);
}
// We might not have matched any to the full inlined call stack.
// But if we did, create and attach metadata, or a function attribute if
// all contexts have identical profiled behavior.
if (!AllocTrie.empty()) {
// MemprofMDAttached will be false if a function attribute was
// attached.
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
if (MemprofMDAttached) {
// Add callsite metadata for the instruction's location list so that
// it simpler later on to identify which part of the MIB contexts
// are from this particular instruction (including during inlining,
// when the callsite metdata will be updated appropriately).
// FIXME: can this be changed to strip out the matching stack
// context ids from the MIB contexts and not add any callsite
// metadata here to save space?
addCallsiteMetadata(I, InlinedCallStack, Ctx);
}
}
continue;
}

// Otherwise, add callsite metadata. If we reach here then we found the
// instruction's leaf location in the callsites map and not the allocation
// map.
assert(CallSitesIter != LocHashToCallSites.end());
for (auto CallStackIdx : CallSitesIter->second) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack(
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
addCallsiteMetadata(I, InlinedCallStack, Ctx);
// Only need to find one with a matching call stack and add a single
// callsite metadata.
break;
}
}
}
}
}
Loading

0 comments on commit 9501405

Please sign in to comment.