198 changes: 159 additions & 39 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,39 +60,164 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination(
return nullptr;
}

YAMLProfileWriter::InlineTreeTy
YAMLProfileWriter::getInlineTree(const MCPseudoProbeDecoder &PseudoProbeDecoder,
uint64_t GUID) {
InlineTreeTy InlineTree;
std::vector<YAMLProfileWriter::InlineTreeNode>
YAMLProfileWriter::getInlineTree(const MCPseudoProbeDecoder &Decoder,
const MCDecodedPseudoProbeInlineTree *Root) {
auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
};
assert(Root);
std::vector<InlineTreeNode> InlineTree;
InlineTreeNode Node{Root, Root->Guid, getHash(*Root), 0, 0};
InlineTree.emplace_back(Node);
uint32_t ParentId = 0;
uint32_t NodeId = 0;
std::queue<const MCDecodedPseudoProbeInlineTree *> Worklist;
const MCDecodedPseudoProbeInlineTree *DummyRoot =
&PseudoProbeDecoder.getDummyInlineRoot();
Worklist.push(DummyRoot);
while (!Worklist.empty()) {
const MCDecodedPseudoProbeInlineTree *Cur = Worklist.front();
while (ParentId != InlineTree.size()) {
const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) {
if (NodeId == 0 && Child.Guid != GUID)
continue;
uint32_t InlineSite = NodeId ? std::get<1>(Child.getInlineSite()) : 0;
yaml::bolt::InlineTreeInfo YamlNode{
NodeId, ParentId, InlineSite, Child.Guid,
PseudoProbeDecoder.getFuncDescForGUID(Child.Guid)->FuncHash};
InlineTree.emplace_back(&Child, YamlNode);
Worklist.push(&Child);
if (NodeId++ == 0)
break;
InlineTreeNode Node{&Child, Child.Guid, getHash(Child), ParentId,
std::get<1>(Child.getInlineSite())};
InlineTree.emplace_back(Node);
}
Worklist.pop();
ParentId += Cur != DummyRoot;
++ParentId;
}

return InlineTree;
}

std::tuple<yaml::bolt::PseudoProbeDesc, YAMLProfileWriter::InlineTreeDesc>
YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
yaml::bolt::PseudoProbeDesc Desc;
InlineTreeDesc InlineTree;

for (const MCDecodedPseudoProbeInlineTree &TopLev :
Decoder.getDummyInlineRoot().getChildren())
InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;

for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
++InlineTree.HashIdxMap[FuncDesc.FuncHash];

InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
for (const auto &Node : Decoder.getInlineTreeVec())
++InlineTree.GUIDIdxMap[Node.Guid];

std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
GUIDFreqVec.emplace_back(Cnt, GUID);
llvm::sort(GUIDFreqVec);

std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
HashFreqVec.reserve(InlineTree.HashIdxMap.size());
for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
HashFreqVec.emplace_back(Cnt, Hash);
llvm::sort(HashFreqVec);

uint32_t Index = 0;
Desc.Hash.reserve(HashFreqVec.size());
for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
Desc.Hash.emplace_back(Hash);
InlineTree.HashIdxMap[Hash] = Index++;
}

Index = 0;
Desc.GUID.reserve(GUIDFreqVec.size());
for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
Desc.GUID.emplace_back(GUID);
InlineTree.GUIDIdxMap[GUID] = Index++;
uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
Desc.GUIDHash.emplace_back(InlineTree.HashIdxMap[Hash]);
}

return {Desc, InlineTree};
}

std::vector<yaml::bolt::PseudoProbeInfo>
YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
struct BlockProbeInfoHasher {
size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
auto HashCombine = [](auto &Range) {
return llvm::hash_combine_range(Range.begin(), Range.end());
};
return llvm::hash_combine(HashCombine(BPI.BlockProbes),
HashCombine(BPI.CallProbes),
HashCombine(BPI.IndCallProbes));
}
};

// Check identical BlockProbeInfo structs and merge them
std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
BlockProbeInfoHasher>
BPIToNodes;
for (auto &[NodeId, Probes] : NodeProbes) {
yaml::bolt::PseudoProbeInfo BPI;
BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
BPIToNodes[BPI].push_back(NodeId);
}

auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
for (auto Id : Ids)
if (Id > 64)
Vec.emplace_back(Id);
else
Mask |= 1ull << (Id - 1);
};

// Add to YAML with merged nodes/block mask optimizations
std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
YamlProbes.reserve(BPIToNodes.size());
for (const auto &[BPI, Nodes] : BPIToNodes) {
auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
YamlBPI.CallProbes = BPI.CallProbes;
YamlBPI.IndCallProbes = BPI.IndCallProbes;
if (Nodes.size() == 1)
YamlBPI.InlineTreeIndex = Nodes.front();
else
YamlBPI.InlineTreeNodes = Nodes;
handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
// Assume BlockMask == 1 if no other probes are set
if (YamlBPI.BlockMask == 1 && YamlBPI.CallProbes.empty() &&
YamlBPI.IndCallProbes.empty())
YamlBPI.BlockMask = 0;
}
return YamlProbes;
}

std::tuple<std::vector<yaml::bolt::InlineTreeInfo>,
YAMLProfileWriter::InlineTreeMapTy>
YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
const InlineTreeDesc &InlineTree,
uint64_t GUID) {
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
std::vector<yaml::bolt::InlineTreeInfo> YamlInlineTree;
auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
if (It == InlineTree.TopLevelGUIDToInlineTree.end())
return {YamlInlineTree, InlineTreeNodeId};
const MCDecodedPseudoProbeInlineTree *Root = It->second;
assert(Root);
uint32_t Index = 0;
uint32_t PrevParent = 0;
uint32_t PrevGUIDIdx = 0;
for (const auto &Node : getInlineTree(Decoder, Root)) {
InlineTreeNodeId[Node.InlineTree] = Index++;
auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID);
assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end());
uint32_t GUIDIdx = GUIDIdxIt->second + 1;
if (GUIDIdx == PrevGUIDIdx)
GUIDIdx = 0;
else
PrevGUIDIdx = GUIDIdx;
YamlInlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx});
PrevParent = Node.ParentId;
}
return {YamlInlineTree, InlineTreeNodeId};
}

yaml::bolt::BinaryFunctionProfile
YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const InlineTreeDesc &InlineTree,
const BoltAddressTranslation *BAT) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
Expand All @@ -112,11 +237,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.ExecCount = BF.getKnownExecutionCount();
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
for (const auto &[InlineTreeNode, YamlInlineTree] :
getInlineTree(*PseudoProbeDecoder, BF.getGUID())) {
InlineTreeNodeId[InlineTreeNode] = YamlInlineTree.Index;
YamlBF.InlineTree.emplace_back(YamlInlineTree);
}
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
}

BinaryFunction::BasicBlockOrderType Order;
Expand Down Expand Up @@ -234,16 +356,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
BB->getInputAddressRange();
const std::pair<uint64_t, uint64_t> BlockAddrRange = {
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
for (const MCDecodedPseudoProbe &Probe :
ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second)) {
uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
uint32_t Offset = Probe.getAddress() - BlockAddrRange.first;
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getIndex(), NodeId, Offset, Probe.getType()});
}
llvm::sort(YamlBB.PseudoProbes);
YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
YamlBB.PseudoProbes.end());
auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
}

YamlBF.Blocks.emplace_back(YamlBB);
Expand Down Expand Up @@ -298,14 +412,20 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
}
BP.Header.Flags = ProfileFlags;

// Add probe inline tree nodes.
InlineTreeDesc InlineTree;
if (const MCPseudoProbeDecoder *Decoder =
opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);

// Add all function objects.
for (const auto &BFI : Functions) {
const BinaryFunction &BF = BFI.second;
if (BF.hasProfile()) {
if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
continue;

BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS));
BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree));
}
}

Expand Down
7 changes: 0 additions & 7 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,6 @@ void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) {
}
}
}
if (ProfiledOnly) {
for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) {
uint64_t GUID = FuncDesc.FuncGUID;
if (!FuncStartAddrs.contains(GUID))
GuidFilter.insert(GUID);
}
}
Contents = PseudoProbeSection->getContents();
if (!ProbeDecoder.buildAddress2ProbeMap(
reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size(),
Expand Down
30 changes: 11 additions & 19 deletions bolt/test/X86/pseudoprobe-decoding-inline.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,23 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes:
# CHECK-YAML-NEXT: - { id: 1, type: 0
# CHECK-YAML-NEXT: - { id: 4, type: 0
# CHECK-YAML: inline_tree:
# CHECK-YAML-NEXT: - { guid: 0xE413754A191DB537, hash: 0x10E852DA94, id: 0 }
# CHECK-YAML: probes: [ { blk: 9 } ]
# CHECK-YAML: inline_tree: [ { g: 1 } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes:
# CHECK-YAML-NEXT: - { id: 1, type: 0 }
# CHECK-YAML-NEXT: - { id: 2, type: 0 }
# CHECK-YAML: inline_tree:
# CHECK-YAML-NEXT: - { guid: 0x5CF8C24CDB18BDAC, hash: 0x200205A19C5B4, id: 0 }
# CHECK-YAML-NEXT: - { guid: 0xE413754A191DB537, hash: 0x10E852DA94, id: 1, callsite: 8 }
# CHECK-YAML: probes: [ { blk: 3 } ]
# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 8 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes:
# CHECK-YAML-NEXT: - { id: 1, type: 0 }
# CHECK-YAML-NEXT: - { id: 1, type: 0, inline_tree_id: 1 }
# CHECK-YAML-NEXT: - { id: 2, type: 0, inline_tree_id: 1 }
# CHECK-YAML: inline_tree:
# CHECK-YAML-NEXT: - { guid: 0xDB956436E78DD5FA, hash: 0x10000FFFFFFFF, id: 0 }
# CHECK-YAML-NEXT: - { guid: 0x5CF8C24CDB18BDAC, hash: 0x200205A19C5B4, id: 1, callsite: 2 }
# CHECK-YAML-NEXT: - { guid: 0xE413754A191DB537, hash: 0x10E852DA94, id: 2, parent: 1, callsite: 8 }
# CHECK-YAML: probes: [ { blk: 3, id: 1 }, { } ]
# CHECK-YAML: inline_tree: [ { g: 3 }, { g: 2, cs: 2 }, { g: 1, p: 1, cs: 8 } ]
#
# CHECK-YAML: pseudo_probe_desc:
# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]
# CHECK-YAML-NEXT: gh: [ 2, 0, 1 ]
# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
#
## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/MC/MCPseudoProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,10 @@ class MCPseudoProbeDecoder {
return iterator_range(It->second);
}

const ArrayRef<MCDecodedPseudoProbeInlineTree> getInlineTreeVec() const {
return InlineTreeVec;
}

private:
// Recursively parse an inlining tree encoded in pseudo_probe section. Returns
// whether the the top-level node should be skipped.
Expand Down