From a0b8804c7d119861c513f946088e4b09dc789367 Mon Sep 17 00:00:00 2001 From: wudexin Date: Wed, 24 Sep 2025 18:42:21 +0800 Subject: [PATCH] Adding Matching and Inference Functionality to Propeller-PR4: Implement matching and inference and create clusters. Co-authored-by: lifengxiang1025 Co-authored-by: zcfh --- .../CodeGen/BasicBlockMatchingAndInference.h | 62 ++++++ .../CodeGen/BasicBlockSectionsProfileReader.h | 7 + .../llvm/CodeGen/MachineBlockHashInfo.h | 2 + llvm/include/llvm/CodeGen/Passes.h | 4 + llvm/include/llvm/InitializePasses.h | 1 + .../Transforms/Utils/SampleProfileInference.h | 16 ++ .../BasicBlockMatchingAndInference.cpp | 187 ++++++++++++++++++ llvm/lib/CodeGen/BasicBlockSections.cpp | 88 ++++++++- .../BasicBlockSectionsProfileReader.cpp | 15 ++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/TargetPassConfig.cpp | 13 +- .../Utils/SampleProfileInference.cpp | 2 - .../basic-block-sections-clusters-bb-hash.ll | 99 ++++++++++ 13 files changed, 488 insertions(+), 9 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h create mode 100644 llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll diff --git a/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h b/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h new file mode 100644 index 0000000000000..6e9bbb969a445 --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h @@ -0,0 +1,62 @@ +//===- llvm/CodeGen/BasicBlockMatchingAndInference.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Infer weights for all basic blocks using matching and inference. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H +#define LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H + +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Transforms/Utils/SampleProfileInference.h" + +namespace llvm { + +class BasicBlockMatchingAndInference : public MachineFunctionPass { +private: + using Edge = std::pair; + using BlockWeightMap = DenseMap; + using EdgeWeightMap = DenseMap; + using BlockEdgeMap = DenseMap>; + + struct WeightInfo { + // Weight of basic blocks. + BlockWeightMap BlockWeights; + // Weight of edges. + EdgeWeightMap EdgeWeights; + }; + +public: + static char ID; + BasicBlockMatchingAndInference(); + + StringRef getPassName() const override { + return "Basic Block Matching and Inference"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &F) override; + + std::optional getWeightInfo(StringRef FuncName) const; + +private: + StringMap ProgramWeightInfo; + + WeightInfo initWeightInfoByMatching(MachineFunction &MF); + + void generateWeightInfoByInference(MachineFunction &MF, + WeightInfo &MatchWeight); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 823753021ff74..e8422e22aca0e 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -90,6 +90,10 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; + // Return the complete function path and cluster info for the given function. + std::pair + getFunctionPathAndClusterInfo(StringRef FuncName) const; + private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -199,6 +203,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + std::pair + getFunctionPathAndClusterInfo(StringRef FuncName) const; + // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. bool doInitialization(Module &M) override; diff --git a/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h index d044d5f940b75..6f26819d566ae 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h @@ -80,6 +80,8 @@ struct BlendedBlockHash { return Dist; } + uint16_t getOpcodeHash() const { return OpcodeHash; } + private: /// The offset of the basic block from the function start. uint16_t Offset{0}; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index a8525554b142e..2bf83cfa655b6 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -69,6 +69,10 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass(); LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass(); +/// createBasicBlockMatchingAndInferencePass - This pass enables matching +/// and inference when using propeller. +LLVM_ABI MachineFunctionPass *createBasicBlockMatchingAndInferencePass(); + /// createMachineBlockHashInfoPass - This pass computes basic block hashes. LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 581b4ad161daa..9360550875219 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -55,6 +55,7 @@ LLVM_ABI void initializeAlwaysInlinerLegacyPassPass(PassRegistry &); LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &); LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &); LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &); +LLVM_ABI void initializeBasicBlockMatchingAndInferencePass(PassRegistry &); LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h index 7231e45fe8eb7..e1663d29c1e3c 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h @@ -130,6 +130,11 @@ template class SampleProfileInference { SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights) : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {} + SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, + BlockWeightMap &SampleBlockWeights, + EdgeWeightMap &SampleEdgeWeights) + : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights), + SampleEdgeWeights(SampleEdgeWeights) {} /// Apply the profile inference algorithm for a given function void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); @@ -157,6 +162,9 @@ template class SampleProfileInference { /// Map basic blocks to their sampled weights. BlockWeightMap &SampleBlockWeights; + + /// Map edges to their sampled weights. + EdgeWeightMap SampleEdgeWeights; }; template @@ -266,6 +274,14 @@ FlowFunction SampleProfileInference::createFlowFunction( FlowJump Jump; Jump.Source = BlockIndex[BB]; Jump.Target = BlockIndex[Succ]; + auto It = SampleEdgeWeights.find(std::make_pair(BB, Succ)); + if (It != SampleEdgeWeights.end()) { + Jump.HasUnknownWeight = false; + Jump.Weight = It->second; + } else { + Jump.HasUnknownWeight = true; + Jump.Weight = 0; + } Func.Jumps.push_back(Jump); } } diff --git a/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp b/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp new file mode 100644 index 0000000000000..e0335f6a0b79f --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp @@ -0,0 +1,187 @@ +//===- llvm/CodeGen/BasicBlockMatchingAndInference.cpp ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Infer weights for all basic blocks using matching and inference. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BasicBlockMatchingAndInference.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineBlockHashInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include + +using namespace llvm; + +static cl::opt + PropellerInferThreshold("propeller-infer-threshold", + cl::desc("Threshold for infer stale profile"), + cl::init(0.6), cl::Optional); + +/// The object is used to identify and match basic blocks given their hashes. +class StaleMatcher { +public: + /// Initialize stale matcher. + void init(const std::vector &Blocks, + const std::vector &Hashes) { + assert(Blocks.size() == Hashes.size() && + "incorrect matcher initialization"); + for (size_t I = 0; I < Blocks.size(); I++) { + MachineBasicBlock *Block = Blocks[I]; + uint16_t OpHash = Hashes[I].getOpcodeHash(); + OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block)); + } + } + + /// Find the most similar block for a given hash. + MachineBasicBlock *matchBlock(BlendedBlockHash BlendedHash) const { + auto BlockIt = OpHashToBlocks.find(BlendedHash.getOpcodeHash()); + if (BlockIt == OpHashToBlocks.end()) { + return nullptr; + } + MachineBasicBlock *BestBlock = nullptr; + uint64_t BestDist = std::numeric_limits::max(); + for (auto It : BlockIt->second) { + MachineBasicBlock *Block = It.second; + BlendedBlockHash Hash = It.first; + uint64_t Dist = Hash.distance(BlendedHash); + if (BestBlock == nullptr || Dist < BestDist) { + BestDist = Dist; + BestBlock = Block; + } + } + return BestBlock; + } + +private: + using HashBlockPairType = std::pair; + std::unordered_map> OpHashToBlocks; +}; + +INITIALIZE_PASS_BEGIN(BasicBlockMatchingAndInference, + "machine-block-match-infer", + "Machine Block Matching and Inference Analysis", true, + true) +INITIALIZE_PASS_DEPENDENCY(MachineBlockHashInfo) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) +INITIALIZE_PASS_END(BasicBlockMatchingAndInference, "machine-block-match-infer", + "Machine Block Matching and Inference Analysis", true, true) + +char BasicBlockMatchingAndInference::ID = 0; + +BasicBlockMatchingAndInference::BasicBlockMatchingAndInference() + : MachineFunctionPass(ID) { + initializeBasicBlockMatchingAndInferencePass( + *PassRegistry::getPassRegistry()); +} + +void BasicBlockMatchingAndInference::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +std::optional +BasicBlockMatchingAndInference::getWeightInfo(StringRef FuncName) const { + auto It = ProgramWeightInfo.find(FuncName); + if (It == ProgramWeightInfo.end()) { + return std::nullopt; + } + return It->second; +} + +BasicBlockMatchingAndInference::WeightInfo +BasicBlockMatchingAndInference::initWeightInfoByMatching(MachineFunction &MF) { + std::vector Blocks; + std::vector Hashes; + auto BSPR = &getAnalysis(); + auto MBHI = &getAnalysis(); + for (auto &Block : MF) { + Blocks.push_back(&Block); + Hashes.push_back(BlendedBlockHash(MBHI->getMBBHash(Block))); + } + StaleMatcher Matcher; + Matcher.init(Blocks, Hashes); + BasicBlockMatchingAndInference::WeightInfo MatchWeight; + auto [Flag, PathAndClusterInfo] = + BSPR->getFunctionPathAndClusterInfo(MF.getName()); + if (!Flag) + return MatchWeight; + for (auto &BlockCount : PathAndClusterInfo.NodeCounts) { + if (PathAndClusterInfo.BBHashes.count(BlockCount.first.BaseID)) { + auto Hash = PathAndClusterInfo.BBHashes[BlockCount.first.BaseID]; + MachineBasicBlock *Block = Matcher.matchBlock(BlendedBlockHash(Hash)); + // When a basic block has clone copies, sum their counts. + if (Block != nullptr) + MatchWeight.BlockWeights[Block] += BlockCount.second; + } + } + for (auto &PredItem : PathAndClusterInfo.EdgeCounts) { + auto PredID = PredItem.first.BaseID; + if (!PathAndClusterInfo.BBHashes.count(PredID)) + continue; + auto PredHash = PathAndClusterInfo.BBHashes[PredID]; + MachineBasicBlock *PredBlock = + Matcher.matchBlock(BlendedBlockHash(PredHash)); + if (PredBlock == nullptr) + continue; + for (auto &SuccItem : PredItem.second) { + auto SuccID = SuccItem.first.BaseID; + auto EdgeWeight = SuccItem.second; + if (PathAndClusterInfo.BBHashes.count(SuccID)) { + auto SuccHash = PathAndClusterInfo.BBHashes[SuccID]; + MachineBasicBlock *SuccBlock = + Matcher.matchBlock(BlendedBlockHash(SuccHash)); + // When an edge has clone copies, sum their counts. + if (SuccBlock != nullptr) + MatchWeight.EdgeWeights[std::make_pair(PredBlock, SuccBlock)] += + EdgeWeight; + } + } + } + return MatchWeight; +} + +void BasicBlockMatchingAndInference::generateWeightInfoByInference( + MachineFunction &MF, + BasicBlockMatchingAndInference::WeightInfo &MatchWeight) { + BlockEdgeMap Successors; + for (auto &Block : MF) { + for (auto *Succ : Block.successors()) + Successors[&Block].push_back(Succ); + } + SampleProfileInference SPI( + MF, Successors, MatchWeight.BlockWeights, MatchWeight.EdgeWeights); + BlockWeightMap BlockWeights; + EdgeWeightMap EdgeWeights; + SPI.apply(BlockWeights, EdgeWeights); + ProgramWeightInfo.try_emplace( + MF.getName(), BasicBlockMatchingAndInference::WeightInfo{ + std::move(BlockWeights), std::move(EdgeWeights)}); +} + +bool BasicBlockMatchingAndInference::runOnMachineFunction(MachineFunction &MF) { + if (MF.empty()) + return false; + auto MatchWeight = initWeightInfoByMatching(MF); + // If the ratio of the number of MBBs in matching to the total number of MBBs + // in the function is less than the threshold value, the processing should be + // abandoned. + if (static_cast(MatchWeight.BlockWeights.size()) / MF.size() < + PropellerInferThreshold) { + return false; + } + generateWeightInfoByInference(MF, MatchWeight); + return false; +} + +MachineFunctionPass *llvm::createBasicBlockMatchingAndInferencePass() { + return new BasicBlockMatchingAndInference(); +} diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index e317e1c06741f..a86ac6d6eab23 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -70,6 +70,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockMatchingAndInference.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/MachineDominators.h" @@ -81,6 +82,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/UniqueBBID.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include using namespace llvm; @@ -175,6 +177,77 @@ updateBranches(MachineFunction &MF, } } +// This function generates the machine basic block clusters of "hot" blocks. +// Currently, only support one cluster creation. +// TODO: Support multi-cluster creation and path cloning. +static std::pair> +createBBClusterInfoForFunction(const MachineFunction &MF, + BasicBlockMatchingAndInference *BMI) { + unsigned CurrentCluster = 0; + auto OptWeightInfo = BMI->getWeightInfo(MF.getName()); + if (!OptWeightInfo) + return std::pair(false, SmallVector{}); + auto BlockWeights = OptWeightInfo->BlockWeights; + auto EdgeWeights = OptWeightInfo->EdgeWeights; + + SmallVector HotMBBs; + if (MF.size() <= 2) { + for (auto &MBB : MF) { + if (MBB.isEntryBlock() || BlockWeights[&MBB] > 0) { + HotMBBs.push_back(&MBB); + } + } + } else { + SmallVector BlockSizes(MF.size()); + SmallVector BlockCounts(MF.size()); + std::vector OrigOrder; + OrigOrder.reserve(MF.size()); + SmallVector JumpCounts; + + // Init the MBB size and count. + for (auto &MBB : MF) { + auto NonDbgInsts = + instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end()); + int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end()); + BlockSizes[MBB.getNumber()] = 4 * NumInsts; + BlockCounts[MBB.getNumber()] = BlockWeights[&MBB]; + OrigOrder.push_back(&MBB); + } + + // Init the edge count. + for (auto &MBB : MF) { + for (auto *Succ : MBB.successors()) { + auto EdgeWeight = EdgeWeights[std::make_pair(&MBB, Succ)]; + JumpCounts.push_back({static_cast(MBB.getNumber()), + static_cast(Succ->getNumber()), + EdgeWeight}); + } + } + + // Run the layout algorithm. + auto Result = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); + for (uint64_t R : Result) { + auto Block = OrigOrder[R]; + if (Block->isEntryBlock() || BlockWeights[Block] > 0) + HotMBBs.push_back(Block); + } + } + + // Generate the "hot" basic block cluster. + if (!HotMBBs.empty()) { + SmallVector BBClusterInfos; + unsigned CurrentPosition = 0; + for (auto &MBB : HotMBBs) { + if (MBB->getBBID()) { + BBClusterInfos.push_back( + {*(MBB->getBBID()), CurrentCluster, CurrentPosition++}); + } + } + return std::pair(true, std::move(BBClusterInfos)); + } + return std::pair(false, SmallVector{}); +} + // This function sorts basic blocks according to the cluster's information. // All explicitly specified clusters of basic blocks will be ordered // accordingly. All non-specified BBs go into a separate "Cold" section. @@ -314,12 +387,16 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { - auto [HasProfile, ClusterInfo] = - getAnalysis() - .getClusterInfoForFunction(MF.getName()); - if (!HasProfile) + std::pair> ExpClusterInfo; + if (auto *BMI = getAnalysisIfAvailable()) { + ExpClusterInfo = createBBClusterInfoForFunction(MF, BMI); + } else { + ExpClusterInfo = getAnalysis() + .getClusterInfoForFunction(MF.getName()); + } + if (!ExpClusterInfo.first) return false; - for (auto &BBClusterInfo : ClusterInfo) { + for (auto &BBClusterInfo : ExpClusterInfo.second) { FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo); } } @@ -402,6 +479,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); + AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); MachineFunctionPass::getAnalysisUsage(AU); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 485b44ae4c4aa..7168906deffca 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -91,6 +91,15 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } +std::pair +BasicBlockSectionsProfileReader::getFunctionPathAndClusterInfo( + StringRef FuncName) const { + auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); + return R != ProgramPathAndClusterInfo.end() + ? std::pair(true, R->second) + : std::pair(false, FunctionPathAndClusterInfo()); +} + // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -512,6 +521,12 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } +std::pair +BasicBlockSectionsProfileReaderWrapperPass::getFunctionPathAndClusterInfo( + StringRef FuncName) const { + return BBSPR.getFunctionPathAndClusterInfo(FuncName); +} + BasicBlockSectionsProfileReader & BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { return BBSPR; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 4373c5397a3c6..17e27052b7b24 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_component_library(LLVMCodeGen BasicBlockSections.cpp BasicBlockPathCloning.cpp BasicBlockSectionsProfileReader.cpp + BasicBlockMatchingAndInference.cpp CalcSpillWeights.cpp CallBrPrepare.cpp CallingConvLower.cpp diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 10b723887b21f..0f1fb506d4a73 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -272,6 +272,12 @@ static cl::opt cl::desc("Split static data sections into hot and cold " "sections using profile information")); +/// Enable matching and inference when using propeller. +static cl::opt + PropellerMatchInfer("propeller-match-infer", + cl::desc("Use match&infer to evaluate stale profile"), + cl::init(false), cl::Optional); + cl::opt EmitBBHash( "emit-bb-hash", cl::desc( @@ -1287,12 +1293,15 @@ void TargetPassConfig::addMachinePasses() { // address map (or both). if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || TM->Options.BBAddrMap) { - if (EmitBBHash) + if (EmitBBHash || PropellerMatchInfer) addPass(llvm::createMachineBlockHashInfoPass()); if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); - addPass(llvm::createBasicBlockPathCloningPass()); + if (PropellerMatchInfer) + addPass(llvm::createBasicBlockMatchingAndInferencePass()); + else + addPass(llvm::createBasicBlockPathCloningPass()); } addPass(llvm::createBasicBlockSectionsPass()); } diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 53bcaa6d3df03..934d1589c4a2e 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -1174,8 +1174,6 @@ std::pair assignJumpCosts(const ProfiParams &Params, else CostInc = Params.CostJumpUnknownInc; CostDec = 0; - } else { - assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight"); } return std::make_pair(CostInc, CostDec); } diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll new file mode 100644 index 0000000000000..0ce3a522b932d --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll @@ -0,0 +1,99 @@ +; BB cluster section tests when using edges profile and basic block hashes to generate clusters. +; +; Test1: Basic blocks #0 (entry), #1 and #3 will be placed in the same section. +; The rest will be placed in the cold section. +; +; RUN: llc %s -O0 -mtriple=x86_64-pc-linux -function-sections -filetype=obj -basic-block-address-map -emit-bb-hash -o %t.o +; +; RUN: echo 'v1' > %t1 +; RUN: echo 'f foo' >> %t1 +; RUN: echo 'g 0:100,1:100,2:0 1:100,3:100 2:0,3:0 3:100' >> %t1 +; +; These commands read BB hashes from SHT_LLVM_BB_ADDR_MAP +; and put them into the basic blocks sections profile. +; RUN: llvm-readobj %t.o --bb-addr-map | \ +; RUN: awk 'BEGIN {printf "h"} \ +; RUN: /ID: [0-9]+/ {id=$2} \ +; RUN: /Hash: 0x[0-9A-Fa-f]+/ {gsub(/^0x/, "", $2); hash=$2; printf " %%s:%%s", id, hash} \ +; RUN: END {print ""}' \ +; RUN: >> %t1 +; +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 -propeller-match-infer | \ +; RUN: FileCheck %s -check-prefix=LINUX-SECTIONS1 +; +; Test2: Basic #0 (entry), #2 and #3 will be placed in the same section. +; The rest will be placed in the cold section. +; +; RUN: echo 'v1' > %t2 +; RUN: echo 'f foo' >> %t2 +; RUN: echo 'g 0:100,1:0,2:100 1:0,3:0 2:100,3:100 3:100' >> %t2 +; +; These commands read BB hashes from SHT_LLVM_BB_ADDR_MAP +; and put them into the basic blocks sections profile. +; RUN: llvm-readobj %t.o --bb-addr-map | \ +; RUN: awk 'BEGIN {printf "h"} \ +; RUN: /ID: [0-9]+/ {id=$2} \ +; RUN: /Hash: 0x[0-9A-Fa-f]+/ {gsub(/^0x/, "", $2); hash=$2; printf " %%s:%%s", id, hash} \ +; RUN: END {print ""}' \ +; RUN: >> %t2 +; +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 -propeller-match-infer | \ +; RUN: FileCheck %s -check-prefix=LINUX-SECTIONS2 + +define void @foo(i1 zeroext) nounwind { + %2 = alloca i8, align 1 + %3 = zext i1 %0 to i8 + store i8 %3, ptr %2, align 1 + %4 = load i8, ptr %2, align 1 + %5 = trunc i8 %4 to i1 + br i1 %5, label %6, label %8 + +6: ; preds = %1 + %7 = call i32 @bar() + br label %10 + +8: ; preds = %1 + %9 = call i32 @baz() + br label %10 + +10: ; preds = %8, %6 + ret void +} + +declare i32 @bar() #1 + +declare i32 @baz() #1 + +; LINUX-SECTIONS1: .section .text.foo,"ax",@progbits +; LINUX-SECTIONS1-NOT: .section +; LINUX-SECTIONS1-LABEL: foo: +; LINUX-SECTIONS1-NOT: .section +; LINUX-SECTIONS1-NOT: .LBB_END0_{{0-9}}+ +; LINUX-SECTIONS1-LABEL: # %bb.1: +; LINUX-SECTIONS1-NOT: .section +; LINUX-SECTIONS1-NOT: .LBB_END0_{{0-9}}+ +; LINUX-SECTIONS1-LABEL: .LBB0_3: +; LINUX-SECTIONS1-LABEL: .LBB_END0_3: +; LINUX-SECTIONS1-NEXT: .section .text.split.foo,"ax",@progbits +; LINUX-SECTIONS1-LABEL: foo.cold: +; LINUX-SECTIONS1-LABEL: .LBB_END0_2: +; LINUX-SECTIONS1-NEXT: .size foo.cold, .LBB_END0_2-foo.cold +; LINUX-SECTIONS1-LABEL: .Lfunc_end0: +; LINUX-SECTIONS1-NEXT: .size foo, .Lfunc_end0-foo + +; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits +; LINUX-SECTIONS2-NOT: .section +; LINUX-SECTIONS2-LABEL: foo: +; LINUX-SECTIONS2-NOT: .section +; LINUX-SECTIONS2-NOT: .LBB_END0_{{0-9}}+ +; LINUX-SECTIONS2-LABEL: # %bb.2: +; LINUX-SECTIONS2-NOT: .section +; LINUX-SECTIONS2-NOT: .LBB_END0_{{0-9}}+ +; LINUX-SECTIONS2-LABEL: .LBB0_3: +; LINUX-SECTIONS2-LABEL: .LBB_END0_3: +; LINUX-SECTIONS2-NEXT: .section .text.split.foo,"ax",@progbits +; LINUX-SECTIONS2-LABEL: foo.cold: +; LINUX-SECTIONS2-LABEL: .LBB_END0_1: +; LINUX-SECTIONS2-NEXT: .size foo.cold, .LBB_END0_1-foo.cold +; LINUX-SECTIONS2-LABEL: .Lfunc_end0: +; LINUX-SECTIONS2-NEXT: .size foo, .Lfunc_end0-foo