201 changes: 114 additions & 87 deletions llvm/lib/MC/MCPseudoProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,17 @@
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <limits>
#include <memory>
#include <sstream>
#include <vector>

#define DEBUG_TYPE "mcpseudoprobe"

Expand All @@ -43,6 +48,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A,

void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
const MCPseudoProbe *LastProbe) const {
bool IsSentinel = isSentinelProbe(getAttributes());
assert((LastProbe || IsSentinel) &&
"Last probe should not be null for non-sentinel probes");

// Emit Index
MCOS->emitULEB128IntValue(Index);
// Emit Type and the flag:
Expand All @@ -53,10 +62,11 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
assert(Attributes <= 0x7 &&
"Probe attributes too big to encode, exceeding 7");
uint8_t PackedType = Type | (Attributes << 4);
uint8_t Flag = LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
uint8_t Flag =
!IsSentinel ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
MCOS->emitInt8(Flag | PackedType);

if (LastProbe) {
if (!IsSentinel) {
// Emit the delta between the address label and LastProbe.
const MCExpr *AddrDelta =
buildSymbolDiff(MCOS, Label, LastProbe->getLabel());
Expand All @@ -67,9 +77,8 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
MCOS->insert(new MCPseudoProbeAddrFragment(AddrDelta));
}
} else {
// Emit label as a symbolic code address.
MCOS->emitSymbolValue(
Label, MCOS->getContext().getAsmInfo()->getCodePointerSize());
// Emit the GUID of the split function that the sentinel probe represents.
MCOS->emitInt64(Guid);
}

LLVM_DEBUG({
Expand All @@ -81,7 +90,7 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
void MCPseudoProbeInlineTree::addPseudoProbe(
const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) {
// The function should not be called on the root.
assert(isRoot() && "Should not be called on root");
assert(isRoot() && "Should only be called on root");

// When it comes here, the input look like:
// Probe: GUID of C, ...
Expand Down Expand Up @@ -128,43 +137,57 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
dbgs() << "Group [\n";
MCPseudoProbeTable::DdgPrintIndent += 2;
});
assert(!isRoot() && "Root should be handled seperately");

// Emit probes grouped by GUID.
if (Guid != 0) {
LLVM_DEBUG({
dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
dbgs() << "GUID: " << Guid << "\n";
});
// Emit Guid
MCOS->emitInt64(Guid);
// Emit number of probes in this node
MCOS->emitULEB128IntValue(Probes.size());
// Emit number of direct inlinees
MCOS->emitULEB128IntValue(Children.size());
// Emit probes in this group
for (const auto &Probe : Probes) {
Probe.emit(MCOS, LastProbe);
LastProbe = &Probe;
}
} else {
assert(Probes.empty() && "Root should not have probes");
LLVM_DEBUG({
dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
dbgs() << "GUID: " << Guid << "\n";
});
// Emit Guid
MCOS->emitInt64(Guid);
// Emit number of probes in this node, including a sentinel probe for
// top-level functions if needed.
bool NeedSentinel = false;
if (Parent->isRoot()) {
assert(isSentinelProbe(LastProbe->getAttributes()) &&
"Starting probe of a top-level function should be a sentinel probe");
// The main body of a split function doesn't need a sentinel probe.
if (LastProbe->getGuid() != Guid)
NeedSentinel = true;
}

// Emit sorted descendant
// InlineSite is unique for each pair,
// so there will be no ordering of Inlinee based on MCPseudoProbeInlineTree*
std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
for (auto &Child : Children)
Inlinees[Child.first] = Child.second.get();
MCOS->emitULEB128IntValue(Probes.size() + NeedSentinel);
// Emit number of direct inlinees
MCOS->emitULEB128IntValue(Children.size());
// Emit sentinel probe for top-level functions
if (NeedSentinel)
LastProbe->emit(MCOS, nullptr);

// Emit probes in this group
for (const auto &Probe : Probes) {
Probe.emit(MCOS, LastProbe);
LastProbe = &Probe;
}

// Emit sorted descendant. InlineSite is unique for each pair, so there will
// be no ordering of Inlinee based on MCPseudoProbeInlineTree*
using InlineeType = std::pair<InlineSite, MCPseudoProbeInlineTree *>;
auto Comparer = [](const InlineeType &A, const InlineeType &B) {
return A.first < B.first;
};
std::vector<InlineeType> Inlinees;
for (const auto &Child : Children)
Inlinees.emplace_back(Child.first, Child.second.get());
std::sort(Inlinees.begin(), Inlinees.end(), Comparer);

for (const auto &Inlinee : Inlinees) {
if (Guid) {
// Emit probe index
MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first));
LLVM_DEBUG({
dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
});
}
// Emit probe index
MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first));
LLVM_DEBUG({
dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
});
// Emit the group
Inlinee.second->emit(MCOS, LastProbe);
}
Expand All @@ -176,17 +199,37 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
});
}

void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) {
void MCPseudoProbeSections::emit(MCObjectStreamer *MCOS) {
MCContext &Ctx = MCOS->getContext();

for (auto &ProbeSec : MCProbeDivisions) {
const MCPseudoProbe *LastProbe = nullptr;
if (auto *S =
Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) {
const auto *FuncSym = ProbeSec.first;
const auto &Root = ProbeSec.second;
if (auto *S = Ctx.getObjectFileInfo()->getPseudoProbeSection(
FuncSym->getSection())) {
// Switch to the .pseudoprobe section or a comdat group.
MCOS->switchSection(S);
// Emit probes grouped by GUID.
ProbeSec.second.emit(MCOS, LastProbe);
// Emit sorted descendant. InlineSite is unique for each pair, so there
// will be no ordering of Inlinee based on MCPseudoProbeInlineTree*
using InlineeType = std::pair<InlineSite, MCPseudoProbeInlineTree *>;
auto Comparer = [](const InlineeType &A, const InlineeType &B) {
return A.first < B.first;
};
std::vector<InlineeType> Inlinees;
for (const auto &Child : Root.getChildren())
Inlinees.emplace_back(Child.first, Child.second.get());
std::sort(Inlinees.begin(), Inlinees.end(), Comparer);

for (const auto &Inlinee : Inlinees) {
// Emit the group guarded by a sentinel probe.
MCPseudoProbe SentinelProbe(const_cast<MCSymbol *>(FuncSym),
MD5Hash(FuncSym->getName()),
(uint32_t)PseudoProbeReservedId::Invalid,
(uint32_t)PseudoProbeType::Block,
(uint32_t)PseudoProbeAttributes::Sentinel);
const MCPseudoProbe *Probe = &SentinelProbe;
Inlinee.second->emit(MCOS, Probe);
}
}
}
}
Expand Down Expand Up @@ -360,39 +403,13 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,

bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
std::unordered_set<uint64_t> &GuildFilter) {
const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) {
// The pseudo_probe section encodes an inline forest and each tree has a
// format like:
// FUNCTION BODY (one for each uninlined function present in the text
// section)
// GUID (uint64)
// GUID of the function
// NPROBES (ULEB128)
// Number of probes originating from this function.
// NUM_INLINED_FUNCTIONS (ULEB128)
// Number of callees inlined into this function, aka number of
// first-level inlinees
// PROBE RECORDS
// A list of NPROBES entries. Each entry contains:
// INDEX (ULEB128)
// TYPE (uint4)
// 0 - block probe, 1 - indirect call, 2 - direct call
// ATTRIBUTE (uint3)
// 1 - tail call, 2 - dangling
// ADDRESS_TYPE (uint1)
// 0 - code address, 1 - address delta
// CODE_ADDRESS (uint64 or ULEB128)
// code address or address delta, depending on Flag
// INLINED FUNCTION RECORDS
// A list of NUM_INLINED_FUNCTIONS entries describing each of the
// inlined callees. Each record contains:
// INLINE SITE
// Index of the callsite probe (ULEB128)
// FUNCTION BODY
// A FUNCTION BODY entry describing the inlined function.
// format defined in MCPseudoProbe.h

uint32_t Index = 0;
if (Cur == &DummyInlineRoot) {
bool IsTopLevelFunc = Cur == &DummyInlineRoot;
if (IsTopLevelFunc) {
// Use a sequential id for top level inliner.
Index = Cur->getChildren().size();
} else {
Expand All @@ -410,15 +427,18 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
uint64_t Guid = std::move(*ErrorOrCurGuid);

// Decide if top-level node should be disgarded.
if (Cur == &DummyInlineRoot && !GuildFilter.empty() &&
!GuildFilter.count(Guid))
if (IsTopLevelFunc && !GuidFilter.empty() && !GuidFilter.count(Guid))
Cur = nullptr;

// If the incoming node is null, all its children nodes should be disgarded.
if (Cur) {
// Switch/add to a new tree node(inlinee)
Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index));
Cur->Guid = Guid;
if (IsTopLevelFunc && !EncodingIsAddrBased) {
if (auto V = FuncStartAddrs.lookup(Guid))
LastAddr = V;
}
}

// Read number of probes in the current node.
Expand Down Expand Up @@ -457,9 +477,21 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
if (!ErrorOrAddr)
return false;
Addr = std::move(*ErrorOrAddr);
if (isSentinelProbe(Attr)) {
// For sentinel probe, the addr field actually stores the GUID of the
// split function. Convert it to the real address.
if (auto V = FuncStartAddrs.lookup(Addr))
Addr = V;
} else {
// For now we assume all probe encoding should be either based on
// leading probe address or function start address.
// The scheme is for downwards compatibility.
// TODO: retire this scheme once compatibility is no longer an issue.
EncodingIsAddrBased = true;
}
}

if (Cur) {
if (Cur && !isSentinelProbe(Attr)) {
// Populate Address2ProbesMap
auto &Probes = Address2ProbesMap[Addr];
Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
Expand All @@ -471,30 +503,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(

uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
for (uint32_t I = 0; I < ChildrenToProcess; I++) {
buildAddress2ProbeMap(Cur, LastAddr, GuildFilter);
buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs);
}

return true;
}

bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
const uint8_t *Start, std::size_t Size,
std::unordered_set<uint64_t> &GuildFilter) {
const uint8_t *Start, std::size_t Size, const Uint64Set &GuidFilter,
const Uint64Map &FuncStartAddrs) {
Data = Start;
End = Data + Size;
uint64_t LastAddr = 0;
while (Data < End)
buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter);
buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter,
FuncStartAddrs);
assert(Data == End && "Have unprocessed data in pseudo_probe section");
return true;
}

bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
std::size_t Size) {
std::unordered_set<uint64_t> GuildFilter;
return buildAddress2ProbeMap(Start, Size, GuildFilter);
}

void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
OS << "Pseudo Probe Desc:\n";
// Make the output deterministic
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/MC/MCStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,8 @@ void MCStreamer::emitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {

void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attr,
const MCPseudoProbeInlineStack &InlineStack) {
const MCPseudoProbeInlineStack &InlineStack,
MCSymbol *FnSym) {
auto &Context = getContext();

// Create a symbol at in the current section for use in the probe.
Expand All @@ -1116,7 +1117,7 @@ void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,

// Add the probe entry to this section's entries.
Context.getMCPseudoProbeTable().getProbeSections().addPseudoProbe(
getCurrentSectionOnly(), Probe, InlineStack);
FnSym, Probe, InlineStack);
}

void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,18 @@
; RUN: llvm-mc -filetype=obj <%t1 -o %t4
; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ


define dso_local void @foo2() !dbg !7 {
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 foo2
ret void, !dbg !10
}

define dso_local void @foo() #0 !dbg !11 {
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]]
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 foo
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 foo
call void @foo2(), !dbg !12
ret void, !dbg !13
}
Expand All @@ -29,9 +30,9 @@ define dso_local i32 @entry() !dbg !14 {
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]]
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]]
; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 entry
; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 entry
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 entry
call void @foo(), !dbg !18
ret i32 0, !dbg !19
}
Expand Down Expand Up @@ -71,6 +72,7 @@ define dso_local i32 @entry() !dbg !14 {

; CHECK-OBJ: .pseudo_probe_desc
; CHECK-OBJ: .pseudo_probe
; CHECK-OBJ-NOT: .rela.pseudo_probe

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
Expand Down
21 changes: 11 additions & 10 deletions llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,24 @@ bb0:
%cmp = icmp eq i32 %x, 0
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 foo
br i1 %cmp, label %bb1, label %bb2

bb1:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 foo
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 foo
store i32 6, ptr @a, align 4
br label %bb3

bb2:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 foo
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 foo
store i32 8, ptr @a, align 4
br label %bb3

Expand All @@ -44,22 +44,22 @@ bb3:
ret void, !dbg !12
}

declare void @bar(i32 %x)
declare void @bar(i32 %x)

define internal void @foo2(ptr %f) !dbg !4 {
entry:
; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 foo2
; Check pseudo_probe metadata attached to the indirect call instruction.
; CHECK-IL: call void %f(i32 1), !dbg ![[#PROBE0:]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 2, 1, 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 2 1 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 2 1 0 foo2
call void %f(i32 1), !dbg !13
; Check pseudo_probe metadata attached to the direct call instruction.
; CHECK-IL: call void @bar(i32 1), !dbg ![[#PROBE1:]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 3, 2, 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 3 2 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 3 2 0 foo2
call void @bar(i32 1)
ret void
}
Expand Down Expand Up @@ -92,7 +92,8 @@ entry:
; CHECK-ASM-NEXT: .ascii "foo2"

; CHECK-OBJ-COUNT-2: .pseudo_probe_desc
; CHECK-OBJ-COUNT-2: .pseudo_probe
; CHECK-OBJ: .pseudo_probe
; CHECK-OBJ-NOT: .rela.pseudo_probe

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
Expand Down
Binary file modified llvm/test/tools/llvm-profgen/Inputs/func-split.perfbin
Binary file not shown.
Binary file modified llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin
Binary file not shown.
5 changes: 2 additions & 3 deletions llvm/test/tools/llvm-profgen/inline-force-dwarf.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
; CHECK-NEXT: 2: 14
; CHECK-NEXT: 3: 15
; CHECK-NEXT: 4: 0
; CHECK-NEXT: 65526: 14
; CHECK-NEXT: 3: bar:224
; CHECK-NEXT: 65525: 14
; CHECK-NEXT: 3: bar:196
; CHECK-NEXT: 1: 14
; CHECK-NEXT: 65533: 14


; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling
Expand Down
1 change: 0 additions & 1 deletion llvm/tools/llvm-profgen/ProfileGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,6 @@ void CSProfileGenerator::generateProfile() {
}

void CSProfileGenerator::computeSizeForProfiledFunctions() {
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
for (auto *Func : Binary->getProfiledFunctions())
Binary->computeInlinedContextSizeForFunc(Func);

Expand Down
63 changes: 51 additions & 12 deletions llvm/tools/llvm-profgen/ProfiledBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,11 +204,6 @@ void ProfiledBinary::load() {
// Find the preferred load address for text sections.
setPreferredTextSegmentAddresses(Obj);

checkPseudoProbe(Obj);

if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);

// Load debug info of subprograms from DWARF section.
// If path of debug info binary is specified, use the debug info from it,
// otherwise use the debug info from the executable binary.
Expand All @@ -220,6 +215,17 @@ void ProfiledBinary::load() {
loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
}

DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
DisassembleFunctions.end());

checkPseudoProbe(Obj);

if (UsePseudoProbes)
populateElfSymbolAddressList(Obj);

if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);

// Disassemble the text sections.
disassemble(Obj);

Expand Down Expand Up @@ -352,10 +358,31 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
if (!UsePseudoProbes)
return;

std::unordered_set<uint64_t> ProfiledGuids;
if (!ShowDisassemblyOnly)
for (auto *F : ProfiledFunctions)
ProfiledGuids.insert(Function::getGUID(F->FuncName));
MCPseudoProbeDecoder::Uint64Set GuidFilter;
MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
if (ShowDisassemblyOnly) {
if (DisassembleFunctionSet.empty()) {
FuncStartAddresses = SymbolStartAddrs;
} else {
for (auto &F : DisassembleFunctionSet) {
auto GUID = Function::getGUID(F.first());
if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
FuncStartAddresses[GUID] = StartAddr;
FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
GuidFilter.insert(Function::getGUID(Range.getFuncName()));
}
}
}
} else {
for (auto *F : ProfiledFunctions) {
GuidFilter.insert(Function::getGUID(F->FuncName));
for (auto &Range : F->Ranges) {
auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
for (auto I = GUIDs.first; I != GUIDs.second; ++I)
FuncStartAddresses[I->second] = I->first;
}
}
}

StringRef FileName = Obj->getFileName();
for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
Expand All @@ -374,7 +401,7 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
StringRef Contents = unwrapOrError(Section.getContents(), FileName);
if (!ProbeDecoder.buildAddress2ProbeMap(
reinterpret_cast<const uint8_t *>(Contents.data()),
Contents.size(), ProfiledGuids))
Contents.size(), GuidFilter, FuncStartAddresses))
exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
}
}
Expand Down Expand Up @@ -578,8 +605,6 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
stable_sort(SecSyms.second);

DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
DisassembleFunctions.end());
assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
"Functions to disassemble should be only specified together with "
"--show-disassembly-only");
Expand Down Expand Up @@ -653,6 +678,20 @@ void ProfiledBinary::checkUseFSDiscriminator(
}
}

void ProfiledBinary::populateElfSymbolAddressList(
const ELFObjectFileBase *Obj) {
// Create a mapping from virtual address to symbol GUID and the other way
// around.
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
uint64_t GUID = Function::getGUID(Name);
SymbolStartAddrs[GUID] = Addr;
StartAddrToSymMap.emplace(Addr, GUID);
}
}

void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
for (const auto &DieInfo : CompilationUnit.dies()) {
llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
Expand Down
23 changes: 18 additions & 5 deletions llvm/tools/llvm-profgen/ProfiledBinary.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "CallContext.h"
#include "ErrorHandling.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
Expand Down Expand Up @@ -166,8 +167,8 @@ class BinarySizeContextTracker {

using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
MCDecodedPseudoProbeInlineTree &ProbeNode,
ProbeFrameStack &Context);
MCDecodedPseudoProbeInlineTree &ProbeNode,
ProbeFrameStack &Context);

void dump() { RootContext.dumpTree(); }

Expand Down Expand Up @@ -218,8 +219,14 @@ class ProfiledBinary {
// A list of binary functions that have samples.
std::unordered_set<const BinaryFunction *> ProfiledFunctions;

// GUID to Elf symbol start address map
DenseMap<uint64_t, uint64_t> SymbolStartAddrs;

// Start address to Elf symbol GUID map
std::unordered_multimap<uint64_t, uint64_t> StartAddrToSymMap;

// An ordered map of mapping function's start address to function range
// relevant info. Currently to determine if the address of ELF is the start of
// relevant info. Currently to determine if the offset of ELF is the start of
// a real function, we leverage the function range info from DWARF.
std::map<uint64_t, FuncRange> StartAddrToFuncRangeMap;

Expand Down Expand Up @@ -278,7 +285,8 @@ class ProfiledBinary {
void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);

template <class ELFT>
void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
StringRef FileName);

void checkPseudoProbe(const ELFObjectFileBase *Obj);

Expand All @@ -298,6 +306,9 @@ class ProfiledBinary {
// Load debug info from DWARF unit.
void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit);

// Create elf symbol to its start address mapping.
void populateElfSymbolAddressList(const ELFObjectFileBase *O);

// A function may be spilt into multiple non-continuous address ranges. We use
// this to set whether start address of a function is the real entry of the
// function and also set false to the non-function label.
Expand Down Expand Up @@ -348,7 +359,9 @@ class ProfiledBinary {
return Address - BaseAddress + getPreferredBaseAddress();
}
// Return the preferred load address for the first executable segment.
uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; }
uint64_t getPreferredBaseAddress() const {
return PreferredTextSegmentAddresses[0];
}
// Return the preferred load address for the first loadable segment.
uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; }
// Return the file offset for the first executable segment.
Expand Down