diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index 41e2bd1651efd..b393c85321b7d 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -29,6 +29,10 @@ struct CallSiteInfo { uint32_t EntryDiscriminator{0}; /// multiple entry discriminator uint64_t Count{0}; uint64_t Mispreds{0}; + // Pseudo probe information, optional + uint32_t Probe{0}; + bool Indirect = false; + uint32_t InlineTreeNode{0}; bool operator==(const CallSiteInfo &Other) const { return Offset == Other.Offset && DestId == Other.DestId && @@ -63,6 +67,9 @@ template <> struct MappingTraits { YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0); YamlIO.mapRequired("cnt", CSI.Count); YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0); + YamlIO.mapOptional("pp", CSI.Probe, 0); + YamlIO.mapOptional("ppn", CSI.InlineTreeNode, 0); + YamlIO.mapOptional("ind", CSI.Indirect, false); } static const bool flow = true; @@ -95,29 +102,20 @@ template <> struct MappingTraits { namespace bolt { struct PseudoProbeInfo { - uint32_t InlineTreeIndex = 0; - uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64 - std::vector BlockProbes; // block probes with indices above 64 - std::vector CallProbes; - std::vector IndCallProbes; + std::vector BlockProbes; std::vector InlineTreeNodes; bool operator==(const PseudoProbeInfo &Other) const { - return InlineTreeIndex == Other.InlineTreeIndex && - BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes && - IndCallProbes == Other.IndCallProbes; + return InlineTreeNodes == Other.InlineTreeNodes && + BlockProbes == Other.BlockProbes; } }; } // end namespace bolt template <> struct MappingTraits { static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) { - YamlIO.mapOptional("blx", PI.BlockMask, 0); - YamlIO.mapOptional("blk", PI.BlockProbes, std::vector()); - YamlIO.mapOptional("call", PI.CallProbes, std::vector()); - YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector()); - YamlIO.mapOptional("id", PI.InlineTreeIndex, 0); - YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector()); + YamlIO.mapOptional("blk", PI.BlockProbes, std::vector(1, 1)); + YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector(1, 0)); } static const bool flow = true; diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h index d4d7217464cc8..50ee78d342df8 100644 --- a/bolt/include/bolt/Profile/YAMLProfileWriter.h +++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h @@ -74,25 +74,24 @@ class YAMLProfileWriter { collectInlineTree(const MCPseudoProbeDecoder &Decoder, const MCDecodedPseudoProbeInlineTree &Root); - // 0 - block probe, 1 - indirect call, 2 - direct call - using ProbeList = std::array, 3>; - using NodeIdToProbes = DenseMap; - static std::vector - convertNodeProbes(NodeIdToProbes &NodeProbes); - public: - template - static std::vector - writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) { - NodeIdToProbes NodeProbes; - for (const MCDecodedPseudoProbe &Probe : Probes) { - auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode()); - if (It == InlineTreeNodeId.end()) - continue; - NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex()); - } - return convertNodeProbes(NodeProbes); - } + class BlockProbeCtx { + struct Call { + uint64_t Id; + uint32_t Node; + bool Indirect; + bool Used; + }; + // Group block probes by node id. + DenseMap> NodeToProbes; + // Offset -> call probe + DenseMap CallProbes; + + public: + void addBlockProbe(const InlineTreeMapTy &Map, + const MCDecodedPseudoProbe &Probe, uint32_t ProbeOffset); + void finalize(yaml::bolt::BinaryBasicBlockProfile &YamlBB); + }; }; } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 3604fdd3a94b4..6a12efdcb96c0 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2383,10 +2383,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, PseudoProbeDecoder->getAddress2ProbesMap(); BinaryFunction::FragmentsSetTy Fragments(BF->Fragments); Fragments.insert(BF); - DenseMap< - uint32_t, - std::vector>> - BlockProbes; + DenseMap BlockCtx; for (const BinaryFunction *F : Fragments) { const uint64_t FuncAddr = F->getAddress(); for (const MCDecodedPseudoProbe &Probe : @@ -2394,15 +2391,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, const uint32_t OutputAddress = Probe.getAddress(); const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); - const unsigned BlockIndex = getBlock(InputOffset).second; - BlockProbes[BlockIndex].emplace_back(Probe); + const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset); + BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe, + InputOffset - BlockOffset); } } - for (auto &[Block, Probes] : BlockProbes) { - YamlBF.Blocks[Block].PseudoProbes = - YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId); - } + for (auto &[Block, Ctx] : BlockCtx) + Ctx.finalize(YamlBF.Blocks[Block]); } // Skip printing if there's no profile data llvm::erase_if( diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 1a61949d77472..5fb65153cf313 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -348,26 +348,10 @@ class StaleMatcher { return It->second; }; - auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo - &ProfileProbe, - uint32_t NodeId) { - for (uint64_t Index = 0; Index < 64; ++Index) - if (ProfileProbe.BlockMask & 1ull << Index) - ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)]; - for (const auto &ProfileProbes : - {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes, - ProfileProbe.CallProbes}) - for (uint64_t ProfileProbe : ProfileProbes) - ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)]; - }; - - for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) { - if (!ProfileProbe.InlineTreeNodes.empty()) - for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes) - matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode); - else - matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex); - } + for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) + for (uint32_t Node : ProfileProbe.InlineTreeNodes) + for (uint64_t Probe : ProfileProbe.BlockProbes) + ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)]; uint32_t BestMatchCount = 0; uint32_t TotalMatchCount = 0; const FlowBlock *BestMatchBlock = nullptr; diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 1632aa1c6bfe2..82f2114a7a3d2 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -129,50 +129,62 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { return {Desc, InlineTree}; } -std::vector -YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { - struct BlockProbeInfoHasher { - size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { - return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes), - llvm::hash_combine_range(BPI.CallProbes), - llvm::hash_combine_range(BPI.IndCallProbes)); +void YAMLProfileWriter::BlockProbeCtx::addBlockProbe( + const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe, + uint32_t ProbeOffset) { + auto It = Map.find(Probe.getInlineTreeNode()); + if (It == Map.end()) + return; + auto NodeId = It->second; + uint32_t Index = Probe.getIndex(); + if (Probe.isCall()) + CallProbes[ProbeOffset] = + Call{Index, NodeId, Probe.isIndirectCall(), false}; + else + NodeToProbes[NodeId].emplace_back(Index); +} + +void YAMLProfileWriter::BlockProbeCtx::finalize( + yaml::bolt::BinaryBasicBlockProfile &YamlBB) { + // Hash block probes by vector + struct ProbeHasher { + size_t operator()(const ArrayRef Probes) const { + return llvm::hash_combine_range(Probes); } }; - // Check identical BlockProbeInfo structs and merge them - std::unordered_map, - BlockProbeInfoHasher> - BPIToNodes; - for (auto &[NodeId, Probes] : NodeProbes) { - yaml::bolt::PseudoProbeInfo BPI; - BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end()); - BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end()); - BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end()); - BPIToNodes[BPI].push_back(NodeId); + // Check identical block probes and merge them + std::unordered_map, std::vector, ProbeHasher> + ProbesToNodes; + for (auto &[NodeId, Probes] : NodeToProbes) { + llvm::sort(Probes); + ProbesToNodes[Probes].emplace_back(NodeId); } - - auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) { - for (auto Id : Ids) - if (Id > 64) - Vec.emplace_back(Id); - else - Mask |= 1ull << (Id - 1); - }; - - // Add to YAML with merged nodes/block mask optimizations - std::vector YamlProbes; - YamlProbes.reserve(BPIToNodes.size()); - for (const auto &[BPI, Nodes] : BPIToNodes) { - auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo()); - YamlBPI.CallProbes = BPI.CallProbes; - YamlBPI.IndCallProbes = BPI.IndCallProbes; - if (Nodes.size() == 1) - YamlBPI.InlineTreeIndex = Nodes.front(); - else - YamlBPI.InlineTreeNodes = Nodes; - handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask); + for (auto &[Probes, Nodes] : ProbesToNodes) { + llvm::sort(Nodes); + YamlBB.PseudoProbes.emplace_back( + yaml::bolt::PseudoProbeInfo{Probes, Nodes}); + } + for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) { + auto It = CallProbes.find(CSI.Offset); + if (It == CallProbes.end()) + continue; + Call &Probe = It->second; + CSI.Probe = Probe.Id; + CSI.InlineTreeNode = Probe.Node; + CSI.Indirect = Probe.Indirect; + Probe.Used = true; + } + for (const auto &[Offset, Probe] : CallProbes) { + if (Probe.Used) + continue; + yaml::bolt::CallSiteInfo CSI; + CSI.Offset = Offset; + CSI.Probe = Probe.Id; + CSI.InlineTreeNode = Probe.Node; + CSI.Indirect = Probe.Indirect; + YamlBB.CallSites.emplace_back(CSI); } - return YamlProbes; } std::tuple, @@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const AddressProbesMap &ProbeMap = PseudoProbeDecoder->getAddress2ProbesMap(); const uint64_t FuncAddr = BF.getAddress(); - const std::pair &BlockRange = - BB->getInputAddressRange(); - const std::pair BlockAddrRange = { - FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; - auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second); - YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId); + auto [Start, End] = BB->getInputAddressRange(); + Start += FuncAddr; + End += FuncAddr; + BlockProbeCtx Ctx; + for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End)) + Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start); + Ctx.finalize(YamlBB); } YamlBF.Blocks.emplace_back(YamlBB); diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test index accb4742851ea..9224cf163dbcc 100644 --- a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test +++ b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test @@ -30,7 +30,7 @@ functions: insns: 11 hash: 0x1 exec: 1 - probes: [ { blx: 9 } ] + probes: [ { blk: [ 1, 4 ] } ] inline_tree: [ { } ] - name: foo fid: 10 @@ -43,7 +43,7 @@ functions: hash: 0x2 exec: 1 succ: [ { bid: 3, cnt: 0 } ] - probes: [ { blx: 3 } ] + probes: [ { blk: [ 1, 2 ] } ] inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ] - name: main fid: 11 @@ -56,7 +56,7 @@ functions: hash: 0x3 exec: 1 succ: [ { bid: 3, cnt: 0 } ] - probes: [ { blx: 3, id: 1 }, { blx: 1 } ] + probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { blk: [ 1 ] } ] inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ] pseudo_probe_desc: gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ] diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test index 40cb64ee82919..7be327d698b17 100644 --- a/bolt/test/X86/match-blocks-with-pseudo-probes.test +++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test @@ -55,7 +55,7 @@ functions: hash: 0xFFFFFFFFFFFFFFF1 insns: 1 succ: [ { bid: 3, cnt: 1} ] - probes: [ { blx: 1 } ] + probes: [ { blk: [ 1 ] } ] inline_tree: [ { g: 0 } ] pseudo_probe_desc: gs: [ 0xDB956436E78DD5FA ] diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test index e5e8aadc18f9e..9748fc1b6a4d4 100644 --- a/bolt/test/X86/pseudoprobe-decoding-inline.test +++ b/bolt/test/X86/pseudoprobe-decoding-inline.test @@ -14,17 +14,17 @@ # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML # CHECK-YAML: name: bar # CHECK-YAML: - bid: 0 -# CHECK-YAML: probes: [ { blx: 9 } ] +# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ] # CHECK-YAML: inline_tree: [ { } ] # # CHECK-YAML: name: foo # CHECK-YAML: - bid: 0 -# CHECK-YAML: probes: [ { blx: 3 } ] +# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ] # CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ] # # CHECK-YAML: name: main # CHECK-YAML: - bid: 0 -# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ] +# CHECK-YAML: probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { } ] # CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ] # # CHECK-YAML: pseudo_probe_desc: diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test index 36a2fab74e857..4ba51cdc96f9e 100644 --- a/bolt/test/X86/pseudoprobe-decoding-noinline.test +++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test @@ -15,17 +15,18 @@ # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML # CHECK-YAML: name: bar # CHECK-YAML: - bid: 0 -# CHECK-YAML: probes: [ { blx: 9 } ] +# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ] # CHECK-YAML: inline_tree: [ { } ] # # CHECK-YAML: name: foo # CHECK-YAML: - bid: 0 -# CHECK-YAML: probes: [ { blx: 3 } ] +# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ] # CHECK-YAML: inline_tree: [ { g: 2 } ] # # CHECK-YAML: name: main # CHECK-YAML: - bid: 0 -# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ] +# CHECK-YAML: calls: [ { off: 0x4, fid: 0, cnt: 0, pp: 2 } ] +# CHECK-YAML: probes: [ { } ] # CHECK-YAML: inline_tree: [ { g: 1 } ] # # CHECK-YAML: pseudo_probe_desc: