diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 1397420b2950f..4db697c1ffcec 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -16,7 +16,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/ValueMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include @@ -165,26 +164,6 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, /// values, but instcombine orders them so it usually won't matter. bool EliminateDuplicatePHINodes(BasicBlock *BB); -/// Class to track cost of simplify CFG transformations. -class SimplifyCFGCostTracker { - /// Number of bonus instructions due to folding branches into predecessors. - /// E.g. folding - /// if (cond1) return false; - /// if (cond2) return false; - /// return true; - /// into - /// if (cond1 | cond2) return false; - /// return true; - /// In this case cond2 is always executed whereas originally it may be - /// evicted due to early exit of cond1. 'cond2' is called bonus instructions - /// and such bonus instructions could accumulate for unrolled loops, therefore - /// use a value map to accumulate their costs across transformations. - ValueMap NumBonusInsts; - -public: - void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount); - unsigned getNumBonusInsts(BasicBlock *Parent); -}; /// This function is used to do simplification of a CFG. For example, it /// adjusts branches to branches to eliminate the extra hop, it eliminates /// unreachable basic blocks, and does other peephole optimization of the CFG. @@ -195,8 +174,7 @@ extern cl::opt RequireAndPreserveDomTree; bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU = nullptr, const SimplifyCFGOptions &Options = {}, - ArrayRef LoopHeaders = {}, - SimplifyCFGCostTracker *CostTracker = nullptr); + ArrayRef LoopHeaders = {}); /// This function is used to flatten a CFG. For example, it uses parallel-and /// and parallel-or mode to collapse if-conditions and merge if-regions with @@ -206,8 +184,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr); /// If this basic block is ONLY a setcc and a branch, and if a predecessor /// branches to us and one of our successors, fold the setcc into the /// predecessor and use logical operations to pick the right destination. -bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker, - DomTreeUpdater *DTU = nullptr, +bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr, const TargetTransformInfo *TTI = nullptr, unsigned BonusInstThreshold = 1); diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index e2646eda06c54..fb2d812a186df 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -221,8 +221,7 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, - const SimplifyCFGOptions &Options, - SimplifyCFGCostTracker &CostTracker) { + const SimplifyCFGOptions &Options) { bool Changed = false; bool LocalChange = true; @@ -253,7 +252,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) ++BBIt; } - if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) { + if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { LocalChange = true; ++NumSimpl; } @@ -267,13 +266,11 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT, const SimplifyCFGOptions &Options) { DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - SimplifyCFGCostTracker CostTracker; bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); EverChanged |= tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr); - EverChanged |= - iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker); + EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -287,8 +284,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, - CostTracker); + EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); } while (EverChanged); diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 13f1080ffc044..8943b4bb651f6 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -480,7 +480,6 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { - SimplifyCFGCostTracker CostTracker; bool Changed = false; if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -662,7 +661,7 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU)) + if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) continue; // Success. The block is now dead, so remove it from the loop, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7008f9b152f7f..fcdd85838340d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -207,21 +207,6 @@ STATISTIC(NumInvokes, STATISTIC(NumInvokesMerged, "Number of invokes that were merged together"); STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed"); -namespace llvm { - -void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB, - unsigned InstCount) { - auto Loc = NumBonusInsts.find(BB); - if (Loc == NumBonusInsts.end()) - Loc = NumBonusInsts.insert({BB, 0}).first; - Loc->second = Loc->second + InstCount; -} -unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) { - return NumBonusInsts.lookup(BB); -} - -} // namespace llvm - namespace { // The first field contains the value that the switch produces when a certain @@ -258,10 +243,6 @@ class SimplifyCFGOpt { ArrayRef LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; - // Accumulates number of bonus instructions due to merging basic blocks - // of common destination. - SimplifyCFGCostTracker *CostTracker; - std::unique_ptr LocalCostTracker; Value *isValueEqualityComparison(Instruction *TI); BasicBlock *GetValueEqualityComparisonCases( @@ -305,15 +286,8 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL, ArrayRef LoopHeaders, - const SimplifyCFGOptions &Opts, - SimplifyCFGCostTracker *CostTracker_) + const SimplifyCFGOptions &Opts) : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { - // Cannot do this with member initializer list since LocalCostTracker is not - // initialized there yet. - CostTracker = CostTracker_ - ? CostTracker_ - : (LocalCostTracker.reset(new SimplifyCFGCostTracker()), - LocalCostTracker.get()); assert((!DTU || !DTU->hasPostDomTree()) && "SimplifyCFG is not yet capable of maintaining validity of a " "PostDomTree, so don't ask for it."); @@ -3650,9 +3624,8 @@ static bool isVectorOp(Instruction &I) { /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, - SimplifyCFGCostTracker &CostTracker, - DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, +bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { // If this block ends with an unconditional branch, @@ -3724,6 +3697,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, // as "bonus instructions", and only allow this transformation when the // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. + unsigned NumBonusInsts = 0; bool SawVectorOp = false; const unsigned PredCount = Preds.size(); for (Instruction &I : *BB) { @@ -3742,13 +3716,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, // predecessor. Ignore free instructions. if (!TTI || TTI->getInstructionCost(&I, CostKind) != TargetTransformInfo::TCC_Free) { - for (auto PredBB : Preds) { - CostTracker.updateNumBonusInsts(PredBB, PredCount); - // Early exits once we reach the limit. - if (CostTracker.getNumBonusInsts(PredBB) > - BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier) - return false; - } + NumBonusInsts += PredCount; + + // Early exits once we reach the limit. + if (NumBonusInsts > + BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier) + return false; } auto IsBCSSAUse = [BB, &I](Use &U) { @@ -3762,12 +3735,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, if (!all_of(I.uses(), IsBCSSAUse)) return false; } - for (auto PredBB : Preds) { - if (CostTracker.getNumBonusInsts(PredBB) > - BonusInstThreshold * - (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1)) - return false; - } + if (NumBonusInsts > + BonusInstThreshold * + (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1)) + return false; // Ok, we have the budget. Perform the transformation. for (BasicBlock *PredBlock : Preds) { @@ -6918,7 +6889,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI, + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; @@ -6987,7 +6958,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI, + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); @@ -7286,9 +7257,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, - ArrayRef LoopHeaders, - SimplifyCFGCostTracker *CostTracker) { + ArrayRef LoopHeaders) { return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders, - Options, CostTracker) + Options) .run(BB); } diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll index 40493b48dfe05..fa39b77aae36a 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -passes='require,loop-unroll,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -passes='require,loop-unroll,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s define void @basic(i32 %K, i32 %N) { ; CHECK-LABEL: @basic( diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index c126dbcd6ca96..05ef21d5a1123 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -S < %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--" diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll index f332cf82b5573..fd400632a5916 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll @@ -3,12 +3,9 @@ %struct.S = type { [4 x i32] } -; Check the second basic block is folded into the first basic block -; since it has one bonus intruction. The third basic block is not -; folded into the first basic block since the accumulated bonus -; instructions will exceed the default threshold of 1. The fourth basic -; block is foled into the third basic block since the accumulated -; bonus instruction cost is 1. +; Check the second, third, and fourth basic blocks are folded into +; the first basic block since each has one bonus intruction, which +; does not exceed the default bouns instruction threshold of 1. define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) { ; CHECK-LABEL: @test1( @@ -18,18 +15,14 @@ define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) { ; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]] ; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]] -; CHECK: for.cond.1: ; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]] ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]] ; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]] ; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]] -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true -; CHECK-NEXT: br label [[CLEANUP]] -; CHECK: cleanup: -; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true +; CHECK-NEXT: ret i1 [[SPEC_SELECT]] ; entry: %mul0 = mul i32 %0, %0 diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll index 0482fa57227f6..c1fd267aef93f 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll @@ -1,9 +1,9 @@ ; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL -; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE -; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE +; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE +; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE ; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL -; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=AGGRESSIVE -; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE +; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=AGGRESSIVE +; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) { ; NORMAL-LABEL: @foo( diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll index 6b8ebd9054dc6..71b8ef5c7612c 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s -; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s +; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s declare void @sideeffect0() declare void @sideeffect1() @@ -10,7 +10,7 @@ declare i1 @gen1() ; Here we'd want to duplicate %v3_adj into two predecessors, ; but -bonus-inst-threshold=1 says that we can only clone it into one. -; With -bonus-inst-threshold=3 we can clone it into both though. +; With -bonus-inst-threshold=2 we can clone it into both though. define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; THR1-LABEL: @two_preds_with_extra_op( ; THR1-NEXT: entry: