Expand Up
@@ -18,9 +18,12 @@
#include " llvm/ADT/SmallVector.h"
#include " llvm/ADT/Statistic.h"
#include " llvm/ADT/StringRef.h"
#include " llvm/Analysis/MemoryBuiltins.h"
#include " llvm/Analysis/MemoryProfileInfo.h"
#include " llvm/Analysis/ValueTracking.h"
#include " llvm/IR/Constant.h"
#include " llvm/IR/DataLayout.h"
#include " llvm/IR/DiagnosticInfo.h"
#include " llvm/IR/Function.h"
#include " llvm/IR/GlobalValue.h"
#include " llvm/IR/IRBuilder.h"
Expand All
@@ -30,16 +33,29 @@
#include " llvm/IR/Type.h"
#include " llvm/IR/Value.h"
#include " llvm/ProfileData/InstrProf.h"
#include " llvm/ProfileData/InstrProfReader.h"
#include " llvm/Support/BLAKE3.h"
#include " llvm/Support/CommandLine.h"
#include " llvm/Support/Debug.h"
#include " llvm/Support/HashBuilder.h"
#include " llvm/Support/VirtualFileSystem.h"
#include " llvm/TargetParser/Triple.h"
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
#include " llvm/Transforms/Utils/ModuleUtils.h"
#include < map>
#include < set>
using namespace llvm ;
using namespace llvm ::memprof;
#define DEBUG_TYPE " memprof"
namespace llvm {
extern cl::opt<bool > PGOWarnMissing;
extern cl::opt<bool > NoPGOWarnMismatch;
extern cl::opt<bool > NoPGOWarnMismatchComdatWeak;
} // namespace llvm
constexpr int LLVM_MEM_PROFILER_VERSION = 1 ;
// Size of memory mapped to a single shadow location.
Expand Down
Expand Up
@@ -128,6 +144,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC (NumInstrumentedWrites, " Number of instrumented writes" );
STATISTIC (NumSkippedStackReads, " Number of non-instrumented stack reads" );
STATISTIC (NumSkippedStackWrites, " Number of non-instrumented stack writes" );
STATISTIC (NumOfMemProfMissing, " Number of functions without memory profile." );
namespace {
Expand Down
Expand Up
@@ -601,3 +618,297 @@ bool MemProfiler::instrumentFunction(Function &F) {
return FunctionModified;
}
static void addCallsiteMetadata (Instruction &I,
std::vector<uint64_t > &InlinedCallStack,
LLVMContext &Ctx) {
I.setMetadata (LLVMContext::MD_callsite,
buildCallstackMetadata (InlinedCallStack, Ctx));
}
static uint64_t computeStackId (GlobalValue::GUID Function, uint32_t LineOffset,
uint32_t Column) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<8 >, llvm::support::endianness::little>
HashBuilder;
HashBuilder.add (Function, LineOffset, Column);
llvm::BLAKE3Result<8 > Hash = HashBuilder.final ();
uint64_t Id;
std::memcpy (&Id, Hash.data (), sizeof (Hash));
return Id;
}
static uint64_t computeStackId (const memprof::Frame &Frame) {
return computeStackId (Frame.Function , Frame.LineOffset , Frame.Column );
}
static void addCallStack (CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo) {
SmallVector<uint64_t > StackIds;
for (const auto &StackFrame : AllocInfo->CallStack )
StackIds.push_back (computeStackId (StackFrame));
auto AllocType = getAllocType (AllocInfo->Info .getTotalLifetimeAccessDensity (),
AllocInfo->Info .getAllocCount (),
AllocInfo->Info .getTotalLifetime ());
AllocTrie.addCallStack (AllocType, StackIds);
}
// Helper to compare the InlinedCallStack computed from an instruction's debug
// info to a list of Frames from profile data (either the allocation data or a
// callsite). For callsites, the StartIndex to use in the Frame array may be
// non-zero.
static bool
stackFrameIncludesInlinedCallStack (ArrayRef<Frame> ProfileCallStack,
ArrayRef<uint64_t > InlinedCallStack,
unsigned StartIndex = 0 ) {
auto StackFrame = ProfileCallStack.begin () + StartIndex;
auto InlCallStackIter = InlinedCallStack.begin ();
for (; StackFrame != ProfileCallStack.end () &&
InlCallStackIter != InlinedCallStack.end ();
++StackFrame, ++InlCallStackIter) {
uint64_t StackId = computeStackId (*StackFrame);
if (StackId != *InlCallStackIter)
return false ;
}
// Return true if we found and matched all stack ids from the call
// instruction.
return InlCallStackIter == InlinedCallStack.end ();
}
static void readMemprof (Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI) {
auto &Ctx = M.getContext ();
auto FuncName = getPGOFuncName (F);
auto FuncGUID = Function::getGUID (FuncName);
Expected<memprof::MemProfRecord> MemProfResult =
MemProfReader->getMemProfRecord (FuncGUID);
if (Error E = MemProfResult.takeError ()) {
handleAllErrors (std::move (E), [&](const InstrProfError &IPE) {
auto Err = IPE.get ();
bool SkipWarning = false ;
LLVM_DEBUG (dbgs () << " Error in reading profile for Func " << FuncName
<< " : " );
if (Err == instrprof_error::unknown_function) {
NumOfMemProfMissing++;
SkipWarning = !PGOWarnMissing;
LLVM_DEBUG (dbgs () << " unknown function" );
} else if (Err == instrprof_error::hash_mismatch) {
SkipWarning =
NoPGOWarnMismatch ||
(NoPGOWarnMismatchComdatWeak &&
(F.hasComdat () ||
F.getLinkage () == GlobalValue::AvailableExternallyLinkage));
LLVM_DEBUG (dbgs () << " hash mismatch (skip=" << SkipWarning << " )" );
}
if (SkipWarning)
return ;
std::string Msg = (IPE.message () + Twine (" " ) + F.getName ().str () +
Twine (" Hash = " ) + std::to_string (FuncGUID))
.str ();
Ctx.diagnose (
DiagnosticInfoPGOProfile (M.getName ().data (), Msg, DS_Warning));
});
return ;
}
// Build maps of the location hash to all profile data with that leaf location
// (allocation info and the callsites).
std::map<uint64_t , std::set<const AllocationInfo *>> LocHashToAllocInfo;
// For the callsites we need to record the index of the associated frame in
// the frame array (see comments below where the map entries are added).
std::map<uint64_t , std::set<std::pair<const SmallVector<Frame> *, unsigned >>>
LocHashToCallSites;
const auto MemProfRec = std::move (MemProfResult.get ());
for (auto &AI : MemProfRec.AllocSites ) {
// Associate the allocation info with the leaf frame. The later matching
// code will match any inlined call sequences in the IR with a longer prefix
// of call stack frames.
uint64_t StackId = computeStackId (AI.CallStack [0 ]);
LocHashToAllocInfo[StackId].insert (&AI);
}
for (auto &CS : MemProfRec.CallSites ) {
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0 ;
for (auto &StackFrame : CS) {
uint64_t StackId = computeStackId (StackFrame);
LocHashToCallSites[StackId].insert (std::make_pair (&CS, Idx++));
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
break ;
}
assert (Idx <= CS.size () && CS[Idx - 1 ].Function == FuncGUID);
}
auto GetOffset = [](const DILocation *DIL) {
return (DIL->getLine () - DIL->getScope ()->getSubprogram ()->getLine ()) &
0xffff ;
};
// Now walk the instructions, looking up the associated profile data using
// dbug locations.
for (auto &BB : F) {
for (auto &I : BB) {
if (I.isDebugOrPseudoInst ())
continue ;
// We are only interested in calls (allocation or interior call stack
// context calls).
auto *CI = dyn_cast<CallBase>(&I);
if (!CI)
continue ;
auto *CalledFunction = CI->getCalledFunction ();
if (CalledFunction && CalledFunction->isIntrinsic ())
continue ;
// List of call stack ids computed from the location hashes on debug
// locations (leaf to inlined at root).
std::vector<uint64_t > InlinedCallStack;
// Was the leaf location found in one of the profile maps?
bool LeafFound = false ;
// If leaf was found in a map, iterators pointing to its location in both
// of the maps. It might exist in neither, one, or both (the latter case
// can happen because we don't currently have discriminators to
// distinguish the case when a single line/col maps to both an allocation
// and another callsite).
std::map<uint64_t , std::set<const AllocationInfo *>>::iterator
AllocInfoIter;
std::map<uint64_t , std::set<std::pair<const SmallVector<Frame> *,
unsigned >>>::iterator CallSitesIter;
for (const DILocation *DIL = I.getDebugLoc (); DIL != nullptr ;
DIL = DIL->getInlinedAt ()) {
// Use C++ linkage name if possible. Need to compile with
// -fdebug-info-for-profiling to get linkage name.
StringRef Name = DIL->getScope ()->getSubprogram ()->getLinkageName ();
if (Name.empty ())
Name = DIL->getScope ()->getSubprogram ()->getName ();
auto CalleeGUID = Function::getGUID (Name);
auto StackId =
computeStackId (CalleeGUID, GetOffset (DIL), DIL->getColumn ());
// LeafFound will only be false on the first iteration, since we either
// set it true or break out of the loop below.
if (!LeafFound) {
AllocInfoIter = LocHashToAllocInfo.find (StackId);
CallSitesIter = LocHashToCallSites.find (StackId);
// Check if the leaf is in one of the maps. If not, no need to look
// further at this call.
if (AllocInfoIter == LocHashToAllocInfo.end () &&
CallSitesIter == LocHashToCallSites.end ())
break ;
LeafFound = true ;
}
InlinedCallStack.push_back (StackId);
}
// If leaf not in either of the maps, skip inst.
if (!LeafFound)
continue ;
// First add !memprof metadata from allocation info, if we found the
// instruction's leaf location in that map, and if the rest of the
// instruction's locations match the prefix Frame locations on an
// allocation context with the same leaf.
if (AllocInfoIter != LocHashToAllocInfo.end ()) {
// Only consider allocations via new, to reduce unnecessary metadata,
// since those are the only allocations that will be targeted initially.
if (!isNewLikeFn (CI, &TLI))
continue ;
// We may match this instruction's location list to multiple MIB
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie;
for (auto *AllocInfo : AllocInfoIter->second ) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
// this MIB.
if (stackFrameIncludesInlinedCallStack (AllocInfo->CallStack ,
InlinedCallStack))
addCallStack (AllocTrie, AllocInfo);
}
// We might not have matched any to the full inlined call stack.
// But if we did, create and attach metadata, or a function attribute if
// all contexts have identical profiled behavior.
if (!AllocTrie.empty ()) {
// MemprofMDAttached will be false if a function attribute was
// attached.
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata (CI);
assert (MemprofMDAttached == I.hasMetadata (LLVMContext::MD_memprof));
if (MemprofMDAttached) {
// Add callsite metadata for the instruction's location list so that
// it simpler later on to identify which part of the MIB contexts
// are from this particular instruction (including during inlining,
// when the callsite metdata will be updated appropriately).
// FIXME: can this be changed to strip out the matching stack
// context ids from the MIB contexts and not add any callsite
// metadata here to save space?
addCallsiteMetadata (I, InlinedCallStack, Ctx);
}
}
continue ;
}
// Otherwise, add callsite metadata. If we reach here then we found the
// instruction's leaf location in the callsites map and not the allocation
// map.
assert (CallSitesIter != LocHashToCallSites.end ());
for (auto CallStackIdx : CallSitesIter->second ) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack (
*CallStackIdx.first , InlinedCallStack, CallStackIdx.second )) {
addCallsiteMetadata (I, InlinedCallStack, Ctx);
// Only need to find one with a matching call stack and add a single
// callsite metadata.
break ;
}
}
}
}
}
MemProfUsePass::MemProfUsePass (std::string MemoryProfileFile,
IntrusiveRefCntPtr<vfs::FileSystem> FS)
: MemoryProfileFileName(MemoryProfileFile), FS(FS) {
if (!FS)
this ->FS = vfs::getRealFileSystem ();
}
PreservedAnalyses MemProfUsePass::run (Module &M, ModuleAnalysisManager &AM) {
LLVM_DEBUG (dbgs () << " Read in memory profile:" );
auto &Ctx = M.getContext ();
auto ReaderOrErr = IndexedInstrProfReader::create (MemoryProfileFileName, *FS);
if (Error E = ReaderOrErr.takeError ()) {
handleAllErrors (std::move (E), [&](const ErrorInfoBase &EI) {
Ctx.diagnose (
DiagnosticInfoPGOProfile (MemoryProfileFileName.data (), EI.message ()));
});
return PreservedAnalyses::all ();
}
std::unique_ptr<IndexedInstrProfReader> MemProfReader =
std::move (ReaderOrErr.get ());
if (!MemProfReader) {
Ctx.diagnose (DiagnosticInfoPGOProfile (
MemoryProfileFileName.data (), StringRef (" Cannot get MemProfReader" )));
return PreservedAnalyses::all ();
}
if (!MemProfReader->hasMemoryProfile ()) {
Ctx.diagnose (DiagnosticInfoPGOProfile (MemoryProfileFileName.data (),
" Not a memory profile" ));
return PreservedAnalyses::all ();
}
auto &FAM = AM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
for (auto &F : M) {
if (F.isDeclaration ())
continue ;
const TargetLibraryInfo &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
readMemprof (M, F, MemProfReader.get (), TLI);
}
return PreservedAnalyses::none ();
}