diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h index d43f399b2c310..292abf8b2b516 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -27,6 +27,15 @@ void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, void avoidZeroOffsetLandingPad(MachineFunction &MF); +/// This checks if the source of this function has drifted since this binary was +/// profiled previously. +/// For now, we are piggy backing on what PGO does to +/// detect this with instrumented profiles. PGO emits an hash of the IR and +/// checks if the hash has changed. Advanced basic block layout is usually done +/// on top of PGO optimized binaries and hence this check works well in +/// practice. +bool hasInstrProfHashMismatch(MachineFunction &MF); + } // end namespace llvm #endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 6e01dfd11ee6d..dfb8d5d9f2f5d 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -19,33 +19,22 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +using namespace llvm; namespace llvm { -// This structure represents a unique ID for every block specified in the -// input profile. -struct ProfileBBID { - // Basic block id associated with `MachineBasicBlock::BBID`. - unsigned BBID; - // The clone id associated with the block. This is zero for the original - // block. For the cloned ones, it is equal to 1 + index of the associated - // path in `FunctionPathAndClusterInfo::ClonePaths`. - unsigned CloneID; -}; - // This struct represents the cluster information for a machine basic block, -// which is specifed by a unique ID. This templated struct is used for both the -// raw input profile (as `BBClusterInfo`) and the processed profile -// after applying the clonings (as `BBClusterInfo`). -template struct BBClusterInfo { +// which is specifed by a unique ID (`MachineBasicBlock::BBID`). +struct BBClusterInfo { // Basic block ID. - BBIDType BasicBlockID; + UniqueBBID BBID; // Cluster ID this basic block belongs to. unsigned ClusterID; // Position of basic block within the cluster. @@ -54,31 +43,31 @@ template struct BBClusterInfo { // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { - // BB Cluster information specified by `ProfileBBID`s (before cloning). - SmallVector> ClusterInfo; + // BB Cluster information specified by `UniqueBBID`s. + SmallVector ClusterInfo; // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along // the edge a -> b (a is not cloned). The index of the path in this vector - // determines the `ProfileBBID::CloneID` of the cloned blocks in that path. + // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; }; -// Provides DenseMapInfo for ProfileBBID. -template <> struct DenseMapInfo { - static inline ProfileBBID getEmptyKey() { +// Provides DenseMapInfo for UniqueBBID. +template <> struct DenseMapInfo { + static inline UniqueBBID getEmptyKey() { unsigned EmptyKey = DenseMapInfo::getEmptyKey(); - return ProfileBBID{EmptyKey, EmptyKey}; + return UniqueBBID{EmptyKey, EmptyKey}; } - static inline ProfileBBID getTombstoneKey() { + static inline UniqueBBID getTombstoneKey() { unsigned TombstoneKey = DenseMapInfo::getTombstoneKey(); - return ProfileBBID{TombstoneKey, TombstoneKey}; + return UniqueBBID{TombstoneKey, TombstoneKey}; } - static unsigned getHashValue(const ProfileBBID &Val) { + static unsigned getHashValue(const UniqueBBID &Val) { std::pair PairVal = - std::make_pair(Val.BBID, Val.CloneID); + std::make_pair(Val.BaseID, Val.CloneID); return DenseMapInfo>::getHashValue(PairVal); } - static bool isEqual(const ProfileBBID &LHS, const ProfileBBID &RHS) { - return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + static bool isEqual(const UniqueBBID &LHS, const UniqueBBID &RHS) { + return DenseMapInfo::isEqual(LHS.BaseID, RHS.BaseID) && DenseMapInfo::isEqual(LHS.CloneID, RHS.CloneID); } }; @@ -113,8 +102,12 @@ class BasicBlockSectionsProfileReader : public ImmutablePass { // function. If the first element is true and the second element is empty, it // means unique basic block sections are desired for all basic blocks of the // function. - std::pair - getPathAndClusterInfoForFunction(StringRef FuncName) const; + std::pair> + getClusterInfoForFunction(StringRef FuncName) const; + + // Returns the path clonings for the given function. + SmallVector> + getClonePathsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. @@ -134,11 +127,11 @@ class BasicBlockSectionsProfileReader : public ImmutablePass { inconvertibleErrorCode()); } - // Parses a `ProfileBBID` from `S`. `S` must be in the form "" + // Parses a `UniqueBBID` from `S`. `S` must be in the form "" // (representing an original block) or "." (representing a // cloned block) where bbid is a non-negative integer and cloneid is a // positive integer. - Expected parseProfileBBID(StringRef S) const; + Expected parseUniqueBBID(StringRef S) const; // Reads the basic block sections profile for functions in this module. Error ReadProfile(); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 15c4fcd8399c1..4b5336fac33ea 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -74,6 +74,13 @@ struct MBBSectionID { MBBSectionID(SectionType T) : Type(T), Number(0) {} }; +// This structure represents the information for a basic block. +struct UniqueBBID { + unsigned BaseID; + // sections profile). + unsigned CloneID; +}; + template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -180,7 +187,7 @@ class MachineBasicBlock /// Fixed unique ID assigned to this basic block upon creation. Used with /// basic block sections and basic block labels. - std::optional BBID; + std::optional BBID; /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -633,7 +640,7 @@ class MachineBasicBlock void setIsEndSection(bool V = true) { IsEndSection = V; } - std::optional getBBID() const { return BBID; } + std::optional getBBID() const { return BBID; } /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } @@ -645,7 +652,7 @@ class MachineBasicBlock } /// Sets the fixed BBID of this basic block. - void setBBID(unsigned V) { + void setBBID(const UniqueBBID &V) { assert(!BBID.has_value() && "Cannot change BBID."); BBID = V; } @@ -753,7 +760,7 @@ class MachineBasicBlock /// /// This is useful when doing a partial clone of successors. Afterward, the /// probabilities may need to be normalized. - void copySuccessor(MachineBasicBlock *Orig, succ_iterator I); + void copySuccessor(const MachineBasicBlock *Orig, succ_iterator I); /// Split the old successor into old plus new and updates the probability /// info. diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 8f1651c2958e5..76a4f9d7eafa4 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -1005,8 +1005,11 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { void deleteMachineInstr(MachineInstr *MI); /// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this - /// instead of `new MachineBasicBlock'. - MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = nullptr); + /// instead of `new MachineBasicBlock'. Sets `MachineBasicBlock::BBID` if + /// basic-block-sections is enabled for the function. + MachineBasicBlock * + CreateMachineBasicBlock(const BasicBlock *BB = nullptr, + std::optional BBID = std::nullopt); /// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. void deleteMachineBasicBlock(MachineBasicBlock *MBB); diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 8d14eef949e91..712048017bca1 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -65,6 +65,8 @@ namespace llvm { /// basic blocks and is enabled with -fbasic-block-sections. MachineFunctionPass *createBasicBlockSectionsPass(); + MachineFunctionPass *createBasicBlockPathCloningPass(); + /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 9fbd431c22778..fafae8b5ecd7a 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -55,6 +55,7 @@ void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); +void initializeBasicBlockPathCloningPass(PassRegistry &); void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBarrierNoopPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b1a670fa3c255..fd440718fd378 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1372,7 +1372,11 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { if (BBAddrMapVersion > 1) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. - OutStreamer->emitULEB128IntValue(*MBB.getBBID()); + // We only emit BaseID since CloneID is unset for + // basic-block-sections=labels. + // TODO: Emit the full BBID when labels and sections can be mixed + // together. + OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); } // Emit the basic block offset relative to the end of the previous block. // This is zero unless the block is padded due to alignment. @@ -1932,30 +1936,33 @@ void AsmPrinter::emitFunctionBody() { // MBB profile information has been set if (MBBProfileDumpFileOutput && !MF->empty() && MF->getFunction().getEntryCount()) { - if (!MF->hasBBLabels()) + if (!MF->hasBBLabels()) { MF->getContext().reportError( SMLoc(), "Unable to find BB labels for MBB profile dump. -mbb-profile-dump " "must be called with -basic-block-sections=labels"); - MachineBlockFrequencyInfo &MBFI = - getAnalysis().getBFI(); - // The entry count and the entry basic block frequency aren't the same. We - // want to capture "absolute" frequencies, i.e. the frequency with which a - // MBB is executed when the program is executed. From there, we can derive - // Function-relative frequencies (divide by the value for the first MBB). - // We also have the information about frequency with which functions - // were called. This helps, for example, in a type of integration tests - // where we want to cross-validate the compiler's profile with a real - // profile. - // Using double precision because uint64 values used to encode mbb - // "frequencies" may be quite large. - const double EntryCount = - static_cast(MF->getFunction().getEntryCount()->getCount()); - for (const auto &MBB : *MF) { - const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); - const double AbsMBBFreq = MBBRelFreq * EntryCount; - *MBBProfileDumpFileOutput.get() - << MF->getName() << "," << MBB.getBBID() << "," << AbsMBBFreq << "\n"; + } else { + MachineBlockFrequencyInfo &MBFI = + getAnalysis().getBFI(); + // The entry count and the entry basic block frequency aren't the same. We + // want to capture "absolute" frequencies, i.e. the frequency with which a + // MBB is executed when the program is executed. From there, we can derive + // Function-relative frequencies (divide by the value for the first MBB). + // We also have the information about frequency with which functions + // were called. This helps, for example, in a type of integration tests + // where we want to cross-validate the compiler's profile with a real + // profile. + // Using double precision because uint64 values used to encode mbb + // "frequencies" may be quite large. + const double EntryCount = + static_cast(MF->getFunction().getEntryCount()->getCount()); + for (const auto &MBB : *MF) { + const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); + const double AbsMBBFreq = MBBRelFreq * EntryCount; + *MBBProfileDumpFileOutput.get() + << MF->getName() << "," << MBB.getBBID()->BaseID << "," + << AbsMBBFreq << "\n"; + } } } } diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp new file mode 100644 index 0000000000000..5d5f3c3da4816 --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -0,0 +1,245 @@ +//===-- BasicBlockPathCloning.cpp ---=========-----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// BasicBlockPathCloning implementation. +/// +/// The purpose of this pass is to clone basic block paths based on information +/// provided by the -fbasic-block-sections=list option. +/// Please refer to BasicBlockSectionsProfileReader.cpp to see a path cloning +/// example. +//===----------------------------------------------------------------------===// +// This pass clones the machine basic blocks alongs the given paths and sets up +// the CFG. It assigns BBIDs to the cloned blocks so that the +// `BasicBlockSections` pass can correctly map the cluster information to the +// blocks. The cloned block's BBID will have the same BaseID as the original +// block, but will get a unique non-zero CloneID (original blocks all have zero +// CloneIDs). This pass applies a path cloning if it satisfies the following +// conditions: +// 1. All BBIDs in the path should be mapped to existing blocks. +// 2. Each two consecutive BBIDs in the path must have a successor +// relationship in the CFG. +// 3. The path should not include a block with indirect branches, except for +// the last block. +// If a path does not satisfy all three conditions, it will be rejected, but the +// CloneIDs for its (supposed to be cloned) blocks will be bypassed to make sure +// that the `BasicBlockSections` pass can map cluster info correctly to the +// actually-cloned blocks. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +// Clones the given block and assigns the given `CloneID` to its BBID. Copies +// the instructions into the new block and sets up its successors. +MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock &OrigBB, + unsigned CloneID) { + auto &MF = *OrigBB.getParent(); + auto TII = MF.getSubtarget().getInstrInfo(); + // Create the clone block and set its BBID based on the original block. + MachineBasicBlock *CloneBB = MF.CreateMachineBasicBlock( + OrigBB.getBasicBlock(), UniqueBBID{OrigBB.getBBID()->BaseID, CloneID}); + MF.push_back(CloneBB); + + // Copy the instructions. + for (auto &I : OrigBB.instrs()) { + // Bundled instructions are duplicated together. + if (I.isBundledWithPred()) + continue; + TII->duplicate(*CloneBB, CloneBB->end(), I); + } + + // Add the successors of the original block as the new block's successors. + // We set the predecessor after returning from this call. + for (auto SI = OrigBB.succ_begin(), SE = OrigBB.succ_end(); SI != SE; ++SI) + CloneBB->copySuccessor(&OrigBB, SI); + + if (auto FT = OrigBB.getFallThrough(/*JumpToFallThrough=*/false)) { + // The original block has an implicit fall through. + // Insert an explicit unconditional jump from the cloned block to the + // fallthrough block. Technically, this is only needed for the last block + // of the path, but we do it for all clones for consistency. + TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc()); + } + return CloneBB; +} + +// Returns if we can legally apply the cloning represented by `ClonePath`. +// `BBIDToBlock` contains the original basic blocks in function `MF` keyed by +// their `BBID::BaseID`. +bool IsValidCloning(const MachineFunction &MF, + const DenseMap &BBIDToBlock, + const SmallVector &ClonePath) { + const MachineBasicBlock *PrevBB = nullptr; + for (size_t I = 0; I < ClonePath.size(); ++I) { + unsigned BBID = ClonePath[I]; + const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID); + if (!PathBB) { + WithColor::warning() << "no block with id " << BBID << " in function " + << MF.getName() << "\n"; + return false; + } + + if (PrevBB) { + if (!PrevBB->isSuccessor(PathBB)) { + WithColor::warning() + << "block #" << BBID << " is not a successor of block #" + << PrevBB->getBBID()->BaseID << " in function " << MF.getName() + << "\n"; + return false; + } + + for (auto &MI : *PathBB) { + // Avoid cloning when the block contains non-duplicable instructions. + // CFI instructions are marked as non-duplicable only because of Darwin, + // so we exclude them from this check. + if (MI.isNotDuplicable() && !MI.isCFIInstruction()) { + WithColor::warning() + << "block #" << BBID + << " has non-duplicable instructions in function " << MF.getName() + << "\n"; + return false; + } + } + } + + if (I != ClonePath.size() - 1 && !PathBB->empty() && + PathBB->back().isIndirectBranch()) { + WithColor::warning() + << "block #" << BBID + << " has indirect branch and appears as the non-tail block of a " + "path in function " + << MF.getName() << "\n"; + return false; + } + PrevBB = PathBB; + } + return true; +} + +// Applies all clonings specified in `ClonePaths` to `MF`. Returns true +// if any clonings have been applied. +bool ApplyCloning(MachineFunction &MF, + const SmallVector> &ClonePaths) { + if (ClonePaths.empty()) + return false; + bool AnyPathsCloned = false; + // Map from the final BB IDs to the `MachineBasicBlock`s. + DenseMap BBIDToBlock; + for (auto &BB : MF) + BBIDToBlock.try_emplace(BB.getBBID()->BaseID, &BB); + + DenseMap NClonesForBBID; + auto TII = MF.getSubtarget().getInstrInfo(); + for (const auto &ClonePath : ClonePaths) { + if (!IsValidCloning(MF, BBIDToBlock, ClonePath)) { + // We still need to increment the number of clones so we can map + // to the cluster info correctly. + for (unsigned BBID : ClonePath) + ++NClonesForBBID[BBID]; + continue; + } + MachineBasicBlock *PrevBB = nullptr; + for (unsigned BBID : ClonePath) { + MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID); + if (PrevBB == nullptr) { + // The first block in the path is not cloned. We only need to make it + // branch to the next cloned block in the path. Here, we make its + // fallthrough explicit so we can change it later. + if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) { + TII->insertUnconditionalBranch(*OrigBB, FT, + OrigBB->findBranchDebugLoc()); + } + PrevBB = OrigBB; + continue; + } + MachineBasicBlock *CloneBB = + CloneMachineBasicBlock(*OrigBB, ++NClonesForBBID[BBID]); + + // Set up the previous block in the path to jump to the clone. This also + // transfers the successor/predecessor relationship of PrevBB and OrigBB + // to that of PrevBB and CloneBB. + PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB); + + // Copy the livein set. + for (auto &LiveIn : OrigBB->liveins()) + CloneBB->addLiveIn(LiveIn); + + PrevBB = CloneBB; + } + AnyPathsCloned = true; + } + return AnyPathsCloned; +} +} // end anonymous namespace + +namespace llvm { +class BasicBlockPathCloning : public MachineFunctionPass { +public: + static char ID; + + BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + + BasicBlockPathCloning() : MachineFunctionPass(ID) { + initializeBasicBlockPathCloningPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Basic Block Path Cloning"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Identify basic blocks that need separate sections and prepare to emit them + /// accordingly. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // namespace llvm + +char BasicBlockPathCloning::ID = 0; +INITIALIZE_PASS_BEGIN( + BasicBlockPathCloning, "bb-path-cloning", + "Applies path clonings for the -basic-block-sections=list option", false, + false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_END( + BasicBlockPathCloning, "bb-path-cloning", + "Applies path clonings for the -basic-block-sections=list option", false, + false) + +bool BasicBlockPathCloning::runOnMachineFunction(MachineFunction &MF) { + assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && + "BB Sections list not enabled!"); + if (hasInstrProfHashMismatch(MF)) + return false; + + return ApplyCloning(MF, getAnalysis() + .getClonePathsForFunction(MF.getName())); +} + +void BasicBlockPathCloning::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineFunctionPass *llvm::createBasicBlockPathCloningPass() { + return new BasicBlockPathCloning(); +} diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 632fd68d88b5c..42997d2287d61 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -175,12 +175,12 @@ updateBranches(MachineFunction &MF, // clusters, they are moved into a single "Exception" section. Eventually, // clusters are ordered in increasing order of their IDs, with the "Exception" // and "Cold" succeeding all other clusters. -// ClusterInfoByBBID represents the cluster information for basic blocks. It +// FuncClusterInfo represents the cluster information for basic blocks. It // maps from BBID of basic blocks to their cluster information. If this is // empty, it means unique sections for all basic blocks in the function. -static void assignSections( - MachineFunction &MF, - const DenseMap> &ClusterInfoByBBID) { +static void +assignSections(MachineFunction &MF, + const DenseMap &FuncClusterInfo) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -191,17 +191,17 @@ static void assignSections( // With the 'all' option, every basic block is placed in a unique section. // With the 'list' option, every basic block is placed in a section // associated with its cluster, unless we want individual unique sections - // for every basic block in this function (if ClusterInfoByBBID is empty). + // for every basic block in this function (if FuncClusterInfo is empty). if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || - ClusterInfoByBBID.empty()) { + FuncClusterInfo.empty()) { // If unique sections are desired for all basic blocks of the function, we // set every basic block's section ID equal to its original position in // the layout (which is equal to its number). This ensures that basic // blocks are ordered canonically. MBB.setSectionID(MBB.getNumber()); } else { - auto I = ClusterInfoByBBID.find(*MBB.getBBID()); - if (I != ClusterInfoByBBID.end()) { + auto I = FuncClusterInfo.find(*MBB.getBBID()); + if (I != FuncClusterInfo.end()) { MBB.setSectionID(I->second.ClusterID); } else { // BB goes into the special cold section if it is not specified in the @@ -264,12 +264,7 @@ void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) { } } -// This checks if the source of this function has drifted since this binary was -// profiled previously. For now, we are piggy backing on what PGO does to -// detect this with instrumented profiles. PGO emits an hash of the IR and -// checks if the hash has changed. Advanced basic block layout is usually done -// on top of PGO optimized binaries and hence this check works well in practice. -static bool hasInstrProfHashMismatch(MachineFunction &MF) { +bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) { if (!BBSectionsDetectSourceDrift) return false; @@ -290,7 +285,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { assert(BBSectionsType != BasicBlockSection::None && "BB Sections not enabled!"); - // Check for source drift. If the source has changed since the profiles + // Check for source drift. If the source has changed since the profiles // were obtained, optimizing basic blocks might be sub-optimal. // This only applies to BasicBlockSection::List as it creates // clusters of basic blocks using basic block ids. Source drift can @@ -298,38 +293,30 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // regards to performance. if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) - return true; + return false; // Renumber blocks before sorting them. This is useful for accessing the // original layout positions and finding the original fallthroughs. MF.RenumberBlocks(); if (BBSectionsType == BasicBlockSection::Labels) { MF.setBBSectionsType(BBSectionsType); - return true; + return false; } - DenseMap> ClusterInfoByBBID; + DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { - auto [HasProfile, PathAndClusterInfo] = + auto [HasProfile, ClusterInfo] = getAnalysis() - .getPathAndClusterInfoForFunction(MF.getName()); + .getClusterInfoForFunction(MF.getName()); if (!HasProfile) - return true; - for (const BBClusterInfo &BBP : - PathAndClusterInfo.ClusterInfo) { - // TODO: Apply the path cloning profile. - assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet"); - const auto [I, Inserted] = ClusterInfoByBBID.try_emplace( - BBP.BasicBlockID.BBID, - BBClusterInfo{BBP.BasicBlockID.BBID, BBP.ClusterID, - BBP.PositionInCluster}); - (void)I; - assert(Inserted && "Duplicate BBID found in profile"); + return false; + for (auto &BBClusterInfo : ClusterInfo) { + FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo); } } MF.setBBSectionsType(BBSectionsType); - assignSections(MF, ClusterInfoByBBID); + assignSections(MF, FuncClusterInfo); // We make sure that the cluster including the entry basic block precedes all // other clusters. @@ -363,8 +350,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) - return ClusterInfoByBBID.lookup(*X.getBBID()).PositionInCluster < - ClusterInfoByBBID.lookup(*Y.getBBID()).PositionInCluster; + return FuncClusterInfo.lookup(*X.getBBID()).PositionInCluster < + FuncClusterInfo.lookup(*Y.getBBID()).PositionInCluster; return X.getNumber() < Y.getNumber(); }; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 6bb412a6c7534..96662378a8693 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -35,15 +35,15 @@ INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", "Reads and parses a basic block sections profile.", false, false) -Expected -BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { +Expected +BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { SmallVector Parts; S.split(Parts, '.'); if (Parts.size() > 2) return createProfileParseError(Twine("unable to parse basic block id: '") + S + "'"); - unsigned long long BBID; - if (getAsUnsignedInteger(Parts[0], 10, BBID)) + unsigned long long BaseBBID; + if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) return createProfileParseError( Twine("unable to parse BB id: '" + Parts[0]) + "': unsigned integer expected"); @@ -51,21 +51,27 @@ BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) return createProfileParseError(Twine("unable to parse clone id: '") + Parts[1] + "': unsigned integer expected"); - return ProfileBBID{static_cast(BBID), - static_cast(CloneID)}; + return UniqueBBID{static_cast(BaseBBID), + static_cast(CloneID)}; } bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getPathAndClusterInfoForFunction(FuncName).first; + return getClusterInfoForFunction(FuncName).first; } -std::pair -BasicBlockSectionsProfileReader::getPathAndClusterInfoForFunction( +std::pair> +BasicBlockSectionsProfileReader::getClusterInfoForFunction( StringRef FuncName) const { auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); return R != ProgramPathAndClusterInfo.end() - ? std::pair(true, R->second) - : std::pair(false, FunctionPathAndClusterInfo()); + ? std::pair(true, R->second.ClusterInfo) + : std::pair(false, SmallVector()); +} + +SmallVector> +BasicBlockSectionsProfileReader::getClonePathsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; } // Reads the version 1 basic block sections profile. Profile for each function @@ -133,7 +139,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // Temporary set to ensure every basic block ID appears once in the clusters // of a function. - DenseSet FuncBBIDs; + DenseSet FuncBBIDs; // Debug-info-based module filename for the current function. Empty string // means no filename. @@ -199,7 +205,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // Reset current cluster position. CurrentPosition = 0; for (auto BasicBlockIDStr : Values) { - auto BasicBlockID = parseProfileBBID(BasicBlockIDStr); + auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); if (!BasicBlockID) return BasicBlockID.takeError(); if (!FuncBBIDs.insert(*BasicBlockID).second) @@ -207,28 +213,32 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Twine("duplicate basic block id found '") + BasicBlockIDStr + "'"); - if (!BasicBlockID->BBID && CurrentPosition) + if (!BasicBlockID->BaseID && CurrentPosition) return createProfileParseError( "entry BB (0) does not begin a cluster."); - FI->second.ClusterInfo.emplace_back(BBClusterInfo{ + FI->second.ClusterInfo.emplace_back(BBClusterInfo{ *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); } CurrentCluster++; continue; case 'p': { // Basic block cloning path specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; SmallSet BBsInPath; FI->second.ClonePaths.push_back({}); for (size_t I = 0; I < Values.size(); ++I) { - auto BBIDStr = Values[I]; - unsigned long long BBID = 0; - if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + auto BaseBBIDStr = Values[I]; + unsigned long long BaseBBID = 0; + if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) return createProfileParseError(Twine("unsigned integer expected: '") + - BBIDStr + "'"); - if (I != 0 && !BBsInPath.insert(BBID).second) + BaseBBIDStr + "'"); + if (I != 0 && !BBsInPath.insert(BaseBBID).second) return createProfileParseError( - Twine("duplicate cloned block in path: '") + BBIDStr + "'"); - FI->second.ClonePaths.back().push_back(BBID); + Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); + FI->second.ClonePaths.back().push_back(BaseBBID); } continue; } @@ -282,9 +292,9 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { "entry BB (0) does not begin a cluster"); FI->second.ClusterInfo.emplace_back( - BBClusterInfo({{static_cast(BBID), 0}, - CurrentCluster, - CurrentPosition++})); + BBClusterInfo({{static_cast(BBID), 0}, + CurrentCluster, + CurrentPosition++})); } CurrentCluster++; } else { diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 389c70d04f17b..df2d1831ee5fd 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -46,6 +46,7 @@ add_llvm_component_library(LLVMCodeGen BranchRelaxation.cpp BreakFalseDeps.cpp BasicBlockSections.cpp + BasicBlockPathCloning.cpp BasicBlockSectionsProfileReader.cpp CalcSpillWeights.cpp CallBrPrepare.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 6272b654b3295..79a95ee0d747a 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -20,6 +20,7 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAssignmentTrackingAnalysisPass(Registry); initializeAtomicExpandPass(Registry); + initializeBasicBlockPathCloningPass(Registry); initializeBasicBlockSectionsPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 65280c65b6878..c01b34d6f490b 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -500,7 +500,7 @@ class MIParser { bool parseAlignment(uint64_t &Alignment); bool parseAddrspace(unsigned &Addrspace); bool parseSectionID(std::optional &SID); - bool parseBBID(std::optional &BBID); + bool parseBBID(std::optional &BBID); bool parseCallFrameSize(unsigned &CallFrameSize); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); @@ -666,14 +666,20 @@ bool MIParser::parseSectionID(std::optional &SID) { } // Parse Machine Basic Block ID. -bool MIParser::parseBBID(std::optional &BBID) { +bool MIParser::parseBBID(std::optional &BBID) { assert(Token.is(MIToken::kw_bb_id)); lex(); - unsigned Value = 0; - if (getUnsigned(Value)) + unsigned BaseID = 0; + unsigned CloneID = 0; + if (getUnsigned(BaseID)) return error("Unknown BB ID"); - BBID = Value; lex(); + if (Token.is(MIToken::IntegerLiteral)) { + if (getUnsigned(CloneID)) + return error("Unknown Clone ID"); + lex(); + } + BBID = {BaseID, CloneID}; return false; } @@ -705,7 +711,7 @@ bool MIParser::parseBasicBlockDefinition( bool IsEHFuncletEntry = false; std::optional SectionID; uint64_t Alignment = 0; - std::optional BBID; + std::optional BBID; unsigned CallFrameSize = 0; BasicBlock *BB = nullptr; if (consumeIfPresent(MIToken::lparen)) { diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 5f9e4a66c0d22..ef8e1bd63024f 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -567,7 +567,9 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, } if (getBBID().has_value()) { os << (hasAttributes ? ", " : " ("); - os << "bb_id " << *getBBID(); + os << "bb_id " << getBBID()->BaseID; + if (getBBID()->CloneID != 0) + os << " " << getBBID()->CloneID; hasAttributes = true; } if (CallFrameSize != 0) { @@ -886,7 +888,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, removeSuccessor(OldI); } -void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, +void MachineBasicBlock::copySuccessor(const MachineBasicBlock *Orig, succ_iterator I) { if (!Orig->Probs.empty()) addSuccessor(*I, Orig->getSuccProbability(I)); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 1f14546a25b1c..857eedebcf72d 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -451,16 +451,17 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) { /// Allocate a new MachineBasicBlock. Use this instead of /// `new MachineBasicBlock'. MachineBasicBlock * -MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { +MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, + std::optional BBID) { MachineBasicBlock *MBB = new (BasicBlockRecycler.Allocate(Allocator)) - MachineBasicBlock(*this, bb); + MachineBasicBlock(*this, BB); // Set BBID for `-basic-block=sections=labels` and // `-basic-block-sections=list` to allow robust mapping of profiles to basic // blocks. if (Target.getBBSectionsType() == BasicBlockSection::Labels || Target.getBBSectionsType() == BasicBlockSection::List) - MBB->setBBID(NextBBID++); + MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index bf1605f06bd88..fe7efb73a2dce 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -430,10 +431,18 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0, return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } -MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const { - assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated"); +MachineInstr & +TargetInstrInfo::duplicate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) const { MachineFunction &MF = *MBB.getParent(); + // CFI instructions are marked as non-duplicable, because Darwin compact + // unwind info emission can't handle multiple prologue setups. + assert((!Orig.isNotDuplicable() || + (!MF.getTarget().getTargetTriple().isOSDarwin() && + Orig.isCFIInstruction())) && + "Instruction cannot be duplicated"); + return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e6ecbc9b03f71..1f7c949cd6031 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1267,6 +1267,7 @@ void TargetPassConfig::addMachinePasses() { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderPass( TM->getBBSectionsFuncListBuf())); + addPass(llvm::createBasicBlockPathCloningPass()); } addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || diff --git a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir index 74a7bcf3ae82f..f11707c719895 100644 --- a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir @@ -136,7 +136,7 @@ body: | MOV32mi $rbp, 1, $noreg, -8, $noreg, 0 :: (store (s32) into %ir.2) - bb.3 (%ir-block.9, bb_id 3): + bb.3 (%ir-block.9, bb_id 3 2): renamable $eax = MOV32rm $rbp, 1, $noreg, -8, $noreg :: (load (s32) from %ir.2) $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp frame-destroy CFI_INSTRUCTION def_cfa $rsp, 8 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll new file mode 100644 index 0000000000000..0f84b891a7c52 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll @@ -0,0 +1,71 @@ +;; Test cloning a single path with -basic-block-sections. + +declare void @effect(i32 zeroext) + +;; Test a valid application of path cloning. +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'p 0 3 5' >> %t +; RUN: echo 'c 0 3.1 5.1 1 2 3 4 5' >> %t +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t -stop-after=bb-path-cloning | FileCheck %s --check-prefix=MIR + +define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b3 + +b1: ; preds = %b0 + call void @effect(i32 1) + br i1 %b, label %b2, label %b3 + +b2: ; preds = %b1 + call void @effect(i32 2) + br label %b3 + +b3: ; preds = %b0, %b1, %b2 + call void @effect(i32 3) + br i1 %c, label %b4, label %b5 + +b4: ; preds = %b3 + call void @effect(i32 4) + br i1 %d, label %b5, label %cold + +b5: ; preds = %b3, %b4 + call void @effect(i32 5) + ret void +cold: + call void @effect(i32 6) ; preds = %b4 + ret void +} + +;; Check the cloned block ids in MIR. + +; MIR: bb.7.b3 (bb_id 3 1): +; MIR: bb.8.b5 (bb_id 5 1): + +;; Check the final layout and branches. + +;; bb section: +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: foo: +; CHECK: # %bb.0: # %b0 +; CHECK: jne .LBB0_1 +; CHECK-NEXT: # %bb.7: # %b3 +; CHECK: jne .LBB0_4 +; CHECK-NEXT: # %bb.8: # %b5 +; CHECK: retq +; CHECK-NEXT: .LBB0_1: # %b1 +; CHECK: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %b2 +; CHECK: callq effect@PLT +; CHECK-NEXT: .LBB0_3: # %b3 +; CHECK: je .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %b4 +; CHECK: je foo.cold +; CHECK-NEXT: .LBB0_5: # %b5 +; CHECK: retq + +;; split section +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK: foo.cold: # %cold diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll new file mode 100644 index 0000000000000..c433491a49430 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll @@ -0,0 +1,86 @@ +;; Test cloning two paths with -basic-block-sections. + +declare void @effect(i32 zeroext) + +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'p 0 3 5' >> %t +; RUN: echo 'p 1 3 4 5' >> %t +; RUN: echo 'c 0 3.1 5.1' >> %t +; RUN: echo 'c 1 3.2 4.1 5.2 2 3 4 5' >> %t +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t -stop-after=bb-path-cloning | FileCheck %s --check-prefix=MIR + +define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b3 + +b1: ; preds = %b0 + call void @effect(i32 1) + br i1 %b, label %b2, label %b3 + +b2: ; preds = %b1 + call void @effect(i32 2) + br label %b3 + +b3: ; preds = %b0, %b1, %b2 + call void @effect(i32 3) + br i1 %c, label %b4, label %b5 + +b4: ; preds = %b3 + call void @effect(i32 4) + br i1 %d, label %b5, label %cold + +b5: ; preds = %b3, %b4 + call void @effect(i32 5) + ret void +cold: + call void @effect(i32 6) ; preds = %b4 + ret void +} + +;; Check the cloned block ids in MIR. + +; MIR: bb.7.b3 (bb_id 3 1): +; MIR: bb.8.b5 (bb_id 5 1): +; MIR: bb.9.b3 (bb_id 3 2): +; MIR: bb.10.b4 (bb_id 4 1): +; MIR: bb.11.b5 (bb_id 5 2): + +;; Check the final layout and branches. + +;; first cluster: +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: foo: +; CHECK: # %bb.0: # %b0 +; CHECK: jne foo.__part.1 +; CHECK-NEXT: # %bb.7: # %b3 +; CHECK: jne .LBB0_4 +; CHECK-NEXT: # %bb.8: # %b5 +; CHECK: retq + +;; second cluster: +; CHECK: .section .text.foo,"ax",@progbits,unique,1 +; CHECK-NEXT: foo.__part.1: # %b1 +; CHECK: jne .LBB0_2 +; CHECK-NEXT: # %bb.9: # %b3 +; CHECK: je .LBB0_5 +; CHECK-NEXT: # %bb.10: # %b4 +; CHECK: je foo.cold +; CHECK-NEXT: # %bb.11: # %b5 +; CHECK: retq +; CHECK-NEXT: .LBB0_2: # %b2 +; CHECK: callq effect@PLT +; CHECK-NEXT: # %bb.3: # %b3 +; CHECK: je .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %b4 +; CHECK: je foo.cold +; CHECK-NEXT: .LBB0_5: # %b5 +; CHECK: retq + +;; split section +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK: foo.cold: # %cold + + diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll new file mode 100644 index 0000000000000..d8686cdfa098e --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll @@ -0,0 +1,45 @@ +;; Tests for invalid path cloning with -basic-block-sections involving indirect branches. + +declare void @effect(i32 zeroext) + +;; Test failed application of path cloning for paths with indirect branches. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f bar' >> %t1 +; RUN: echo 'p 0 1 2' >> %t1 +; RUN: echo 'c 0 1.1 2.1 1' >> %t1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 2> %t1.err | FileCheck %s +; RUN: FileCheck %s --check-prefix=WARN < %t1.err +; RUN: echo 'v1' > %t2 +; RUN: echo 'f bar' >> %t2 +; RUN: echo 'p 1 2' >> %t2 +; RUN: echo 'c 0 1 2.1' >> %t2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s +; RUN: FileCheck %s --check-prefix=WARN < %t2.err + + +define void @bar(i1 %a, i1 %b) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b2 +b1: ; preds = %b0 + call void @effect(i32 1) + %0 = select i1 %b, ; [#uses=1] + ptr blockaddress(@bar, %b2), + ptr blockaddress(@bar, %b3) + indirectbr ptr %0, [label %b2, label %b3] +b2: ; preds = %b0, %b1 + call void @effect(i32 2) + ret void +b3: + call void @effect(i32 3) ; preds = %b1 + ret void +} + +; CHECK: .section .text.bar,"ax",@progbits +; CHECK: bar: +; CHECK: # %bb.0: # %b0 +; CHECK: # %bb.1: # %b1 +; CHECK: .section .text.split.bar,"ax",@progbits +; CHECK: bar.cold: # %b2 + +; WARN: warning: block #1 has indirect branch and appears as the non-tail block of a path in function bar diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll new file mode 100644 index 0000000000000..3d9a8d36ca105 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll @@ -0,0 +1,43 @@ +;; Test for cloning a path ending with indirect branch with -basic-block-sections. + +declare void @effect(i32 zeroext) + +;; Test valid application of cloning for a path with indirect branch. +; RUN: echo 'v1' > %t +; RUN: echo 'f bar' >> %t +; RUN: echo 'p 0 1' >> %t +; RUN: echo 'c 0 1.1 2 1' >> %t +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s + +define void @bar(i1 %a, i1 %b) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b2 +b1: ; preds = %b0 + call void @effect(i32 1) + %0 = select i1 %b, ; [#uses=1] + ptr blockaddress(@bar, %b2), + ptr blockaddress(@bar, %b3) + indirectbr ptr %0, [label %b2, label %b3] +b2: ; preds = %b0, %b1 + call void @effect(i32 2) + ret void +b3: + call void @effect(i32 3) ; preds = %b1 + ret void +} + +; CHECK: .section .text.bar,"ax",@progbits +; CHECK: bar: +; CHECK: # %bb.0: # %b0 +; CHECK: je .LBB0_2 +; CHECK-NEXT: # %bb.4: # %b1 +; CHECK: jmpq *%rax +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: .LBB0_2: # %b2 +; CHECK: retq +; CHECK-NEXT: # %bb.1: # %b1 +; CHECK: jmpq *%rax +; CHECK: .section .text.split.bar,"ax",@progbits +; CHECK: bar.cold: # %b3 + diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll new file mode 100644 index 0000000000000..521ec43ef050a --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll @@ -0,0 +1,72 @@ +;; Tests for invalid or (partially invalid) path clonings with -basic-block-sections. + +declare void @effect(i32 zeroext) + +;; Test failed application of path cloning. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f foo' >> %t1 +; RUN: echo 'p 0 2 3' >> %t1 +; RUN: echo 'c 0 2.1 3.1 1' >> %t1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 2> %t1.err | FileCheck %s +; RUN: FileCheck %s --check-prefixes=WARN1 < %t1.err +;; Test that valid clonings are applied correctly, even if invalid clonings exist. +; RUN: echo 'v1' > %t2 +; RUN: echo 'f foo' >> %t2 +; RUN: echo 'p 0 2 3' >> %t2 +; RUN: echo 'p 0 1 3' >> %t2 +; RUN: echo 'c 0 1.1 3.2 2.1 3.1 1' >> %t2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s --check-prefixes=PATH +; RUN: FileCheck %s --check-prefixes=WARN1 < %t2.err +; RUN: echo 'v1' > %t3 +; RUN: echo 'f foo' >> %t3 +; RUN: echo 'p 0 100' >> %t3 +; RUN: echo 'c 0 100.1 1' >> %t3 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s +; RUN: FileCheck %s --check-prefixes=WARN2 < %t3.err + +define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b3 + +b1: ; preds = %b0 + call void @effect(i32 1) + br i1 %b, label %b2, label %b3 + +b2: ; preds = %b1 + call void @effect(i32 2) + br label %b3 + +b3: ; preds = %b0, %b1, %b2 + call void @effect(i32 3) + br i1 %c, label %b4, label %b5 + +b4: ; preds = %b3 + call void @effect(i32 4) + br i1 %d, label %b5, label %cold + +b5: ; preds = %b3, %b4 + call void @effect(i32 5) + ret void +cold: + call void @effect(i32 6) ; preds = %b4 + ret void +} + +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: foo: +; CHECK: # %bb.0: # %b0 + +; CHECK: je .LBB0_3 +; PATH: # %bb.7: # %b1 +; PATH: # %bb.8: # %b3 +; PATH: jne .LBB0_4 +; CHECK: # %bb.1: # %b1 +; CHECK: jne foo.cold + +; CHECK: foo.cold: # %b2 + +;; Check the warnings +; WARN1: warning: block #2 is not a successor of block #0 in function foo +; WARN2: warning: no block with id 100 in function foo +