diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 5fb65153cf313..f1cffde975520 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -197,6 +197,7 @@ struct BlendedBlockHash { /// release. class StaleMatcher { public: + FlowBlock *EntryBlock = nullptr; /// Initialize stale matcher. void init(const std::vector &Blocks, const std::vector &Hashes, @@ -204,6 +205,8 @@ class StaleMatcher { assert(Blocks.size() == Hashes.size() && Hashes.size() == CallHashes.size() && "incorrect matcher initialization"); + if (!Blocks.empty()) + EntryBlock = Blocks[0]; for (size_t I = 0; I < Blocks.size(); I++) { FlowBlock *Block = Blocks[I]; uint16_t OpHash = Hashes[I].OpcodeHash; @@ -551,7 +554,12 @@ size_t matchWeights( const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, const BinaryFunction &BF, - const ArrayRef ProbeMatchSpecs) { + const ArrayRef ProbeMatchSpecs); + +std::pair> +initMatcher(BinaryContext &BC, const BinaryFunction &BF, + const BinaryFunction::BasicBlockOrderType &BlockOrder, + FlowFunction &Func, HashFunction HashFunction) { assert(Func.Blocks.size() == BlockOrder.size() + 2); @@ -595,14 +603,22 @@ size_t matchWeights( Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); } Matcher.init(Blocks, BlendedHashes, CallHashes); + return {Matcher, BlendedHashes}; +} - using FlowBlockTy = - std::pair; - using ProfileBlockMatchMap = DenseMap; - // Binary profile => block index => matched block + its block profile - DenseMap - MatchedBlocks; - +using FlowBlockTy = + std::pair; +using ProfileBlockMatchMap = DenseMap; + +// Match \p YamlBF blocks and return a mapping from block index to matched flow +// block. +ProfileBlockMatchMap +matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF, + HashFunction HashFunction, + YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + StaleMatcher &Matcher, ArrayRef BlendedHashes, + const ArrayRef ProbeMatchSpecs) { + ProfileBlockMatchMap MatchedBlocks; // Map of FlowBlock and matching method. DenseMap MatchedFlowBlocks; @@ -617,7 +633,7 @@ size_t matchWeights( if (MatchedFlowBlocks.contains(MatchedBlock)) return; MatchedFlowBlocks.try_emplace(MatchedBlock, Method); - MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; + MatchedBlocks[YamlBB.Index] = {MatchedBlock, &YamlBB}; }; // Match blocks from the profile to the blocks in CFG by strict hash. @@ -632,6 +648,8 @@ size_t matchWeights( } // Match blocks from the profile to the blocks in CFG by pseudo probes. for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { + if (&YamlBP.get() != &YamlBF) + continue; for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) if (!BB.PseudoProbes.empty()) addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), @@ -654,7 +672,7 @@ size_t matchWeights( } auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); if (MatchedBlock == nullptr && YamlBB.Index == 0) { - MatchedBlock = Blocks[0]; + MatchedBlock = Matcher.EntryBlock; // Report as loose match Method = StaleMatcher::MATCH_OPCODE; } @@ -667,93 +685,99 @@ size_t matchWeights( addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); } - // Match jumps from the profile to the jumps from CFG - std::vector OutWeight(Func.Blocks.size(), 0); - std::vector InWeight(Func.Blocks.size(), 0); - - for (const auto &[YamlBF, MatchMap] : MatchedBlocks) { - for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) { - const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; - StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); - BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; - LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" - << " with hash " << Twine::utohexstr(YamlBB->Hash) - << " to BB (index = " << MatchedBlock->Index - 1 << ")" - << " with hash " << Twine::utohexstr(BinHash.combine()) - << "\n"); - (void)BinHash; - uint64_t ExecCount = YamlBB->ExecCount; - // Update matching stats accounting for the matched block. - switch (Method) { - case StaleMatcher::MATCH_EXACT: - ++BC.Stats.NumExactMatchedBlocks; - BC.Stats.ExactMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " exact match\n"); - break; - case StaleMatcher::MATCH_PROBE_EXACT: - ++BC.Stats.NumPseudoProbeExactMatchedBlocks; - BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); - break; - case StaleMatcher::MATCH_PROBE_LOOSE: - ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; - BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); - break; - case StaleMatcher::MATCH_CALL: - ++BC.Stats.NumCallMatchedBlocks; - BC.Stats.CallMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " call match\n"); - break; - case StaleMatcher::MATCH_OPCODE: - ++BC.Stats.NumLooseMatchedBlocks; - BC.Stats.LooseMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " loose match\n"); - break; - case StaleMatcher::NO_MATCH: - LLVM_DEBUG(dbgs() << " no match\n"); - } + for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchedBlocks) { + const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; + StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); + BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; + LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" + << " with hash " << Twine::utohexstr(YamlBB->Hash) + << " to BB (index = " << MatchedBlock->Index - 1 << ")" + << " with hash " << Twine::utohexstr(BinHash.combine()) + << "\n"); + (void)BinHash; + uint64_t ExecCount = YamlBB->ExecCount; + // Update matching stats accounting for the matched block. + switch (Method) { + case StaleMatcher::MATCH_EXACT: + ++BC.Stats.NumExactMatchedBlocks; + BC.Stats.ExactMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " exact match\n"); + break; + case StaleMatcher::MATCH_PROBE_EXACT: + ++BC.Stats.NumPseudoProbeExactMatchedBlocks; + BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); + break; + case StaleMatcher::MATCH_PROBE_LOOSE: + ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; + BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); + break; + case StaleMatcher::MATCH_CALL: + ++BC.Stats.NumCallMatchedBlocks; + BC.Stats.CallMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " call match\n"); + break; + case StaleMatcher::MATCH_OPCODE: + ++BC.Stats.NumLooseMatchedBlocks; + BC.Stats.LooseMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " loose match\n"); + break; + case StaleMatcher::NO_MATCH: + LLVM_DEBUG(dbgs() << " no match\n"); } + } + return MatchedBlocks; +} - for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) { - for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { - if (YamlSI.Count == 0) - continue; - - // Try to find the jump for a given (src, dst) pair from the profile and - // assign the jump weight based on the profile count - const uint64_t SrcIndex = YamlBB.Index; - const uint64_t DstIndex = YamlSI.Index; - - const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first; - const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first; - - if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { - // Find a jump between the two blocks - FlowJump *Jump = nullptr; - for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { - if (SuccJump->Target == MatchedDstBlock->Index) { - Jump = SuccJump; - break; - } - } - // Assign the weight, if the corresponding jump is found - if (Jump != nullptr) { - Jump->Weight = YamlSI.Count; - Jump->HasUnknownWeight = false; +/// Move \p YamlBF edge weights to flow function using \p MatchedBlocks, and +/// update \p OutWeight and \p InWeight arrays. +void transferEdgeWeights(ProfileBlockMatchMap &MatchedBlocks, + MutableArrayRef OutWeight, + MutableArrayRef InWeight, + const yaml::bolt::BinaryFunctionProfile &YamlBF) { + for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { + if (YamlSI.Count == 0) + continue; + + // Try to find the jump for a given (src, dst) pair from the profile and + // assign the jump weight based on the profile count + const uint64_t SrcIndex = YamlBB.Index; + const uint64_t DstIndex = YamlSI.Index; + + const FlowBlock *MatchedSrcBlock = MatchedBlocks.lookup(SrcIndex).first; + const FlowBlock *MatchedDstBlock = MatchedBlocks.lookup(DstIndex).first; + + if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { + // Find a jump between the two blocks + FlowJump *Jump = nullptr; + for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { + if (SuccJump->Target == MatchedDstBlock->Index) { + Jump = SuccJump; + break; } } - // Assign the weight for the src block, if it is found - if (MatchedSrcBlock != nullptr) - OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; - // Assign the weight for the dst block, if it is found - if (MatchedDstBlock != nullptr) - InWeight[MatchedDstBlock->Index] += YamlSI.Count; + // Assign the weight, if the corresponding jump is found + if (Jump != nullptr) { + Jump->Weight = YamlSI.Count; + Jump->HasUnknownWeight = false; + } } + // Assign the weight for the src block, if it is found + if (MatchedSrcBlock != nullptr) + OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; + // Assign the weight for the dst block, if it is found + if (MatchedDstBlock != nullptr) + InWeight[MatchedDstBlock->Index] += YamlSI.Count; } } +} - // Assign block counts based on in-/out- jumps +// Assign block counts based on in-/out- jumps +size_t setBlockWeights(FlowFunction &Func, ArrayRef OutWeight, + ArrayRef InWeight) { + size_t Matched = 0; for (FlowBlock &Block : Func.Blocks) { if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { assert(Block.HasUnknownWeight && "unmatched block with a positive count"); @@ -761,9 +785,45 @@ size_t matchWeights( } Block.HasUnknownWeight = false; Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); + ++Matched; } - return MatchedBlocks[&YamlBF].size(); + return Matched; +} + +/// Assign initial block/jump weights based on the stale profile data. The goal +/// is to extract as much information from the stale profile as possible. Here +/// we assume that each basic block is specified via a hash value computed from +/// its content and the hashes of the unchanged basic blocks stay the same +/// across different revisions of the binary. Blocks may also have pseudo probe +/// information in the profile and the binary which is used for matching. +/// Whenever there is a count in the profile with the hash corresponding to one +/// of the basic blocks in the binary, the count is "matched" to the block. +/// Similarly, if both the source and the target of a count in the profile are +/// matched to a jump in the binary, the count is recorded in CFG. +size_t matchWeights( + BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, + HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + const BinaryFunction &BF, + const ArrayRef ProbeMatchSpecs) { + using namespace yaml::bolt; + + assert(Func.Blocks.size() == BlockOrder.size() + 2); + + auto [Matcher, BlendedHashes] = + initMatcher(BC, BF, BlockOrder, Func, HashFunction); + + // Match jumps from the profile to the jumps from CFG + std::vector OutWeight(Func.Blocks.size(), 0); + std::vector InWeight(Func.Blocks.size(), 0); + + ProfileBlockMatchMap MatchedBlocks = + matchBlocks(BC, YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes, + ProbeMatchSpecs); + transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, YamlBF); + + return setBlockWeights(Func, OutWeight, InWeight); } /// The function finds all blocks that are (i) reachable from the Entry block