-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Improve foldICmpWithDominatingICmp
with DomConditionCache
#75370
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch uses affected values from DomConditionCache(introduced by #73662), instead of a cheap/incomplete check Full diff: https://github.com/llvm/llvm-project/pull/75370.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 91642e3babceec..7a463f7e5fe917 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1323,73 +1323,73 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
/// Canonicalize icmp instructions based on dominating conditions.
Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
- // This is a cheap/incomplete check for dominance - just match a single
- // predecessor with a conditional branch.
- BasicBlock *CmpBB = Cmp.getParent();
- BasicBlock *DomBB = CmpBB->getSinglePredecessor();
- if (!DomBB)
- return nullptr;
-
- Value *DomCond;
- BasicBlock *TrueBB, *FalseBB;
- if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
- return nullptr;
-
- assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
- "Predecessor block does not point to successor?");
-
- // The branch should get simplified. Don't bother simplifying this condition.
- if (TrueBB == FalseBB)
- return nullptr;
-
// We already checked simple implication in InstSimplify, only handle complex
// cases here.
-
- CmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
ICmpInst::Predicate DomPred;
- const APInt *C, *DomC;
- if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
- match(Y, m_APInt(C))) {
- // We have 2 compares of a variable with constants. Calculate the constant
- // ranges of those compares to see if we can transform the 2nd compare:
- // DomBB:
- // DomCond = icmp DomPred X, DomC
- // br DomCond, CmpBB, FalseBB
- // CmpBB:
- // Cmp = icmp Pred X, C
- ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
- ConstantRange DominatingCR =
- (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
- : ConstantRange::makeExactICmpRegion(
- CmpInst::getInversePredicate(DomPred), *DomC);
- ConstantRange Intersection = DominatingCR.intersectWith(CR);
- ConstantRange Difference = DominatingCR.difference(CR);
- if (Intersection.isEmptySet())
- return replaceInstUsesWith(Cmp, Builder.getFalse());
- if (Difference.isEmptySet())
- return replaceInstUsesWith(Cmp, Builder.getTrue());
+ const APInt *C;
+ if (!match(Y, m_APInt(C)))
+ return nullptr;
- // Canonicalizing a sign bit comparison that gets used in a branch,
- // pessimizes codegen by generating branch on zero instruction instead
- // of a test and branch. So we avoid canonicalizing in such situations
- // because test and branch instruction has better branch displacement
- // than compare and branch instruction.
- bool UnusedBit;
- bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
- if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
- return nullptr;
+ auto handleDomCond = [&](Value *DomCond, bool CondIsTrue) -> Instruction * {
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+ const APInt *DomC;
+ if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC)))) {
+ // We have 2 compares of a variable with constants. Calculate the constant
+ // ranges of those compares to see if we can transform the 2nd compare:
+ // DomBB:
+ // DomCond = icmp DomPred X, DomC
+ // br DomCond, CmpBB, FalseBB
+ // CmpBB:
+ // Cmp = icmp Pred X, C
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
+ ConstantRange DominatingCR =
+ CondIsTrue ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
+ : ConstantRange::makeExactICmpRegion(
+ CmpInst::getInversePredicate(DomPred), *DomC);
+ ConstantRange Intersection = DominatingCR.intersectWith(CR);
+ ConstantRange Difference = DominatingCR.difference(CR);
+ if (Intersection.isEmptySet())
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
+ if (Difference.isEmptySet())
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
+
+ // Canonicalizing a sign bit comparison that gets used in a branch,
+ // pessimizes codegen by generating branch on zero instruction instead
+ // of a test and branch. So we avoid canonicalizing in such situations
+ // because test and branch instruction has better branch displacement
+ // than compare and branch instruction.
+ bool UnusedBit;
+ bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
+ if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
+ return nullptr;
- // Avoid an infinite loop with min/max canonicalization.
- // TODO: This will be unnecessary if we canonicalize to min/max intrinsics.
- if (Cmp.hasOneUse() &&
- match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value())))
- return nullptr;
+ // Avoid an infinite loop with min/max canonicalization.
+ // TODO: This will be unnecessary if we canonicalize to min/max
+ // intrinsics.
+ if (Cmp.hasOneUse() &&
+ match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value())))
+ return nullptr;
+
+ if (const APInt *EqC = Intersection.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
+ if (const APInt *NeC = Difference.getSingleElement())
+ return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
+ }
+ return nullptr;
+ };
- if (const APInt *EqC = Intersection.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
- if (const APInt *NeC = Difference.getSingleElement())
- return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
+ for (BranchInst *BI : DC.conditionsFor(X)) {
+ auto *Cond = BI->getCondition();
+ BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
+ if (DT.dominates(Edge0, Cmp.getParent()))
+ if (auto *V = handleDomCond(Cond, true))
+ return V;
+
+ BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
+ if (DT.dominates(Edge1, Cmp.getParent()))
+ if (auto *V = handleDomCond(Cond, false))
+ return V;
}
return nullptr;
diff --git a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
index c2e3f59ec678d3..b364116e78e4f9 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
@@ -18,8 +18,8 @@ define void @test(ptr %x, i32 %n) {
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[REM]], 1
-; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY_1:%.*]], label [[WHILE_END]]
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[REM]], 1
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY_1:%.*]]
; CHECK: while.body.1:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[TMP1]], 10
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index e2cb6a24123503..75222e5ee13716 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -38,8 +38,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
@@ -47,8 +46,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
@@ -56,8 +54,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
-; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
@@ -65,8 +62,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
-; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
@@ -74,8 +70,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
-; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
@@ -83,8 +78,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
-; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
@@ -92,8 +86,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
-; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
@@ -134,12 +127,11 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: otherexit.loopexit:
-; CHECK-NEXT: [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
; CHECK: otherexit.loopexit3:
; CHECK-NEXT: br label [[OTHEREXIT]]
; CHECK: otherexit:
-; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
; CHECK-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
; CHECK-NEXT: ret i32 [[RVAL]]
;
@@ -774,8 +766,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
@@ -783,8 +774,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
@@ -792,8 +782,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
-; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
@@ -801,8 +790,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
-; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
@@ -810,8 +798,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
-; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
@@ -819,8 +806,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
-; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
@@ -828,8 +814,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
-; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[TMP0]], 42
-; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
@@ -870,12 +855,11 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: otherexit.loopexit:
-; CHECK-NEXT: [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
; CHECK: otherexit.loopexit3:
; CHECK-NEXT: br label [[OTHEREXIT]]
; CHECK: otherexit:
-; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[RVAL_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
; CHECK-NEXT: br label [[OTHEREXIT2:%.*]]
; CHECK: otherexit2:
; CHECK-NEXT: [[RVAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
8032c74
to
79e15a9
Compare
This patch uses affected values from DomConditionCache(introduced by #73662), instead of a cheap/incomplete check
getSinglePredecessor
.Compile-time impact: http://llvm-compile-time-tracker.com/compare.php?from=930b5b52ffe699dbcf05eea32d12a2861dd2bdf6&to=8032c74ff972786c96251be21b71eefe18c89740&stat=instructions:u