311 changes: 311 additions & 0 deletions llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
Expand All @@ -30,16 +33,29 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <map>
#include <set>

using namespace llvm;
using namespace llvm::memprof;

#define DEBUG_TYPE "memprof"

namespace llvm {
extern cl::opt<bool> PGOWarnMissing;
extern cl::opt<bool> NoPGOWarnMismatch;
extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
} // namespace llvm

constexpr int LLVM_MEM_PROFILER_VERSION = 1;

// Size of memory mapped to a single shadow location.
Expand Down Expand Up @@ -128,6 +144,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");

namespace {

Expand Down Expand Up @@ -601,3 +618,297 @@ bool MemProfiler::instrumentFunction(Function &F) {

return FunctionModified;
}

static void addCallsiteMetadata(Instruction &I,
std::vector<uint64_t> &InlinedCallStack,
LLVMContext &Ctx) {
I.setMetadata(LLVMContext::MD_callsite,
buildCallstackMetadata(InlinedCallStack, Ctx));
}

static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
uint32_t Column) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
HashBuilder;
HashBuilder.add(Function, LineOffset, Column);
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
uint64_t Id;
std::memcpy(&Id, Hash.data(), sizeof(Hash));
return Id;
}

static uint64_t computeStackId(const memprof::Frame &Frame) {
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
}

static void addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo) {
SmallVector<uint64_t> StackIds;
for (const auto &StackFrame : AllocInfo->CallStack)
StackIds.push_back(computeStackId(StackFrame));
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
AllocInfo->Info.getAllocCount(),
AllocInfo->Info.getTotalLifetime());
AllocTrie.addCallStack(AllocType, StackIds);
}

// Helper to compare the InlinedCallStack computed from an instruction's debug
// info to a list of Frames from profile data (either the allocation data or a
// callsite). For callsites, the StartIndex to use in the Frame array may be
// non-zero.
static bool
stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
ArrayRef<uint64_t> InlinedCallStack,
unsigned StartIndex = 0) {
auto StackFrame = ProfileCallStack.begin() + StartIndex;
auto InlCallStackIter = InlinedCallStack.begin();
for (; StackFrame != ProfileCallStack.end() &&
InlCallStackIter != InlinedCallStack.end();
++StackFrame, ++InlCallStackIter) {
uint64_t StackId = computeStackId(*StackFrame);
if (StackId != *InlCallStackIter)
return false;
}
// Return true if we found and matched all stack ids from the call
// instruction.
return InlCallStackIter == InlinedCallStack.end();
}

static void readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI) {
auto &Ctx = M.getContext();

auto FuncName = getPGOFuncName(F);
auto FuncGUID = Function::getGUID(FuncName);
Expected<memprof::MemProfRecord> MemProfResult =
MemProfReader->getMemProfRecord(FuncGUID);
if (Error E = MemProfResult.takeError()) {
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
bool SkipWarning = false;
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
<< ": ");
if (Err == instrprof_error::unknown_function) {
NumOfMemProfMissing++;
SkipWarning = !PGOWarnMissing;
LLVM_DEBUG(dbgs() << "unknown function");
} else if (Err == instrprof_error::hash_mismatch) {
SkipWarning =
NoPGOWarnMismatch ||
(NoPGOWarnMismatchComdatWeak &&
(F.hasComdat() ||
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
}

if (SkipWarning)
return;

std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
Twine(" Hash = ") + std::to_string(FuncGUID))
.str();

Ctx.diagnose(
DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
});
return;
}

// Build maps of the location hash to all profile data with that leaf location
// (allocation info and the callsites).
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
// For the callsites we need to record the index of the associated frame in
// the frame array (see comments below where the map entries are added).
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
LocHashToCallSites;
const auto MemProfRec = std::move(MemProfResult.get());
for (auto &AI : MemProfRec.AllocSites) {
// Associate the allocation info with the leaf frame. The later matching
// code will match any inlined call sequences in the IR with a longer prefix
// of call stack frames.
uint64_t StackId = computeStackId(AI.CallStack[0]);
LocHashToAllocInfo[StackId].insert(&AI);
}
for (auto &CS : MemProfRec.CallSites) {
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0;
for (auto &StackFrame : CS) {
uint64_t StackId = computeStackId(StackFrame);
LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
break;
}
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
}

auto GetOffset = [](const DILocation *DIL) {
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
0xffff;
};

// Now walk the instructions, looking up the associated profile data using
// dbug locations.
for (auto &BB : F) {
for (auto &I : BB) {
if (I.isDebugOrPseudoInst())
continue;
// We are only interested in calls (allocation or interior call stack
// context calls).
auto *CI = dyn_cast<CallBase>(&I);
if (!CI)
continue;
auto *CalledFunction = CI->getCalledFunction();
if (CalledFunction && CalledFunction->isIntrinsic())
continue;
// List of call stack ids computed from the location hashes on debug
// locations (leaf to inlined at root).
std::vector<uint64_t> InlinedCallStack;
// Was the leaf location found in one of the profile maps?
bool LeafFound = false;
// If leaf was found in a map, iterators pointing to its location in both
// of the maps. It might exist in neither, one, or both (the latter case
// can happen because we don't currently have discriminators to
// distinguish the case when a single line/col maps to both an allocation
// and another callsite).
std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
AllocInfoIter;
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
unsigned>>>::iterator CallSitesIter;
for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
DIL = DIL->getInlinedAt()) {
// Use C++ linkage name if possible. Need to compile with
// -fdebug-info-for-profiling to get linkage name.
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
auto CalleeGUID = Function::getGUID(Name);
auto StackId =
computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
// LeafFound will only be false on the first iteration, since we either
// set it true or break out of the loop below.
if (!LeafFound) {
AllocInfoIter = LocHashToAllocInfo.find(StackId);
CallSitesIter = LocHashToCallSites.find(StackId);
// Check if the leaf is in one of the maps. If not, no need to look
// further at this call.
if (AllocInfoIter == LocHashToAllocInfo.end() &&
CallSitesIter == LocHashToCallSites.end())
break;
LeafFound = true;
}
InlinedCallStack.push_back(StackId);
}
// If leaf not in either of the maps, skip inst.
if (!LeafFound)
continue;

// First add !memprof metadata from allocation info, if we found the
// instruction's leaf location in that map, and if the rest of the
// instruction's locations match the prefix Frame locations on an
// allocation context with the same leaf.
if (AllocInfoIter != LocHashToAllocInfo.end()) {
// Only consider allocations via new, to reduce unnecessary metadata,
// since those are the only allocations that will be targeted initially.
if (!isNewLikeFn(CI, &TLI))
continue;
// We may match this instruction's location list to multiple MIB
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie;
for (auto *AllocInfo : AllocInfoIter->second) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
// this MIB.
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
InlinedCallStack))
addCallStack(AllocTrie, AllocInfo);
}
// We might not have matched any to the full inlined call stack.
// But if we did, create and attach metadata, or a function attribute if
// all contexts have identical profiled behavior.
if (!AllocTrie.empty()) {
// MemprofMDAttached will be false if a function attribute was
// attached.
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
if (MemprofMDAttached) {
// Add callsite metadata for the instruction's location list so that
// it simpler later on to identify which part of the MIB contexts
// are from this particular instruction (including during inlining,
// when the callsite metdata will be updated appropriately).
// FIXME: can this be changed to strip out the matching stack
// context ids from the MIB contexts and not add any callsite
// metadata here to save space?
addCallsiteMetadata(I, InlinedCallStack, Ctx);
}
}
continue;
}

// Otherwise, add callsite metadata. If we reach here then we found the
// instruction's leaf location in the callsites map and not the allocation
// map.
assert(CallSitesIter != LocHashToCallSites.end());
for (auto CallStackIdx : CallSitesIter->second) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack(
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
addCallsiteMetadata(I, InlinedCallStack, Ctx);
// Only need to find one with a matching call stack and add a single
// callsite metadata.
break;
}
}
}
}
}

MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
IntrusiveRefCntPtr<vfs::FileSystem> FS)
: MemoryProfileFileName(MemoryProfileFile), FS(FS) {
if (!FS)
this->FS = vfs::getRealFileSystem();
}

PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
LLVM_DEBUG(dbgs() << "Read in memory profile:");
auto &Ctx = M.getContext();
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
if (Error E = ReaderOrErr.takeError()) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
Ctx.diagnose(
DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
});
return PreservedAnalyses::all();
}

std::unique_ptr<IndexedInstrProfReader> MemProfReader =
std::move(ReaderOrErr.get());
if (!MemProfReader) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
return PreservedAnalyses::all();
}

if (!MemProfReader->hasMemoryProfile()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
"Not a memory profile"));
return PreservedAnalyses::all();
}

auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

for (auto &F : M) {
if (F.isDeclaration())
continue;

const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
readMemprof(M, F, MemProfReader.get(), TLI);
}

return PreservedAnalyses::none();
}
292 changes: 9 additions & 283 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Large diffs are not rendered by default.

29 changes: 23 additions & 6 deletions llvm/test/Transforms/PGOProfile/memprof.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,36 @@
; ALL-NOT: memprof record not found for function hash
; ALL-NOT: no profile data available for function

;; Feed back memprof-only profile
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
;; Using a memprof-only profile for memprof-use should only give memprof metadata
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
; There should not be any PGO metadata
; MEMPROFONLY-NOT: !prof

;; Feed back pgo-only profile
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgoprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY
;; Test the same thing but by passing the memory profile through to a default
;; pipeline via -memory-profile-file=, which should cause the necessary field
;; of the PGOOptions structure to be populated with the profile filename.
; RUN: opt < %s -passes='default<O2>' -memory-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY

;; Using a pgo+memprof profile for memprof-use should only give memprof metadata
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY

;; Using a pgo-only profile for memprof-use should give an error
; RUN: not opt < %s -passes='memprof-use<profile-filename=%t.pgoprofdata>' -S 2>&1 | FileCheck %s --check-prefixes=MEMPROFWITHPGOONLY
; MEMPROFWITHPGOONLY: Not a memory profile

;; Using a memprof-only profile for pgo-instr-use should give an error
; RUN: not opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -S 2>&1 | FileCheck %s --check-prefixes=PGOWITHMEMPROFONLY
; PGOWITHMEMPROFONLY: Not an IR level instrumentation profile

;; Using a pgo+memprof profile for pgo-instr-use should only give pgo metadata
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY
; There should not be any memprof related metadata
; PGOONLY-NOT: !memprof
; PGOONLY-NOT: !callsite

;; Feed back pgo+memprof-only profile
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,PGO,ALL
;; Using a pgo+memprof profile for both memprof-use and pgo-instr-use should
;; give both memprof and pgo metadata.
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO

; ModuleID = 'memprof.cc'
source_filename = "memprof.cc"
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata

; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s

; CHECK: memprof record not found for function hash {{.*}} _Z16funcnotinprofilev

Expand Down
20 changes: 13 additions & 7 deletions llvm/tools/opt/NewPMDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ static cl::opt<PGOKind>
"Use sampled profile to guide PGO.")));
static cl::opt<std::string> ProfileFile("profile-file",
cl::desc("Path to the profile."), cl::Hidden);
static cl::opt<std::string>
MemoryProfileFile("memory-profile-file",
cl::desc("Path to the memory profile."), cl::Hidden);

static cl::opt<CSPGOKind> CSPGOKindFlag(
"cspgo-kind", cl::init(NoCSPGO), cl::Hidden,
Expand Down Expand Up @@ -336,19 +339,21 @@ bool llvm::runPassPipeline(
std::optional<PGOOptions> P;
switch (PGOKindFlag) {
case InstrGen:
P = PGOOptions(ProfileFile, "", "", FS, PGOOptions::IRInstr);
P = PGOOptions(ProfileFile, "", "", MemoryProfileFile, FS,
PGOOptions::IRInstr);
break;
case InstrUse:
P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS,
P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS,
PGOOptions::IRUse);
break;
case SampleUse:
P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS,
P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS,
PGOOptions::SampleUse);
break;
case NoPGO:
if (DebugInfoForProfiling || PseudoProbeForProfiling)
P = PGOOptions("", "", "", nullptr, PGOOptions::NoAction,
if (DebugInfoForProfiling || PseudoProbeForProfiling ||
!MemoryProfileFile.empty())
P = PGOOptions("", "", "", MemoryProfileFile, FS, PGOOptions::NoAction,
PGOOptions::NoCSAction, DebugInfoForProfiling,
PseudoProbeForProfiling);
else
Expand All @@ -369,8 +374,9 @@ bool llvm::runPassPipeline(
P->CSAction = PGOOptions::CSIRInstr;
P->CSProfileGenFile = CSProfileGenFile;
} else
P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, FS,
PGOOptions::NoAction, PGOOptions::CSIRInstr);
P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile,
/*MemoryProfile=*/"", FS, PGOOptions::NoAction,
PGOOptions::CSIRInstr);
} else /* CSPGOKindFlag == CSInstrUse */ {
if (!P) {
errs() << "CSInstrUse needs to be together with InstrUse";
Expand Down