-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold dependent IVs #81151
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesFold Proof: https://alive2.llvm.org/ce/z/hfmwgf Fixes #77108. Full diff: https://github.com/llvm/llvm-project/pull/81151.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 20b34c1379d57..4924a58095963 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1378,6 +1378,47 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
return nullptr;
}
+// Fold iv = phi(start, iv.next = iv2.next + start)
+// where iv2 = phi(iv2.start, iv2.next = iv2 + iv2.step)
+// to iv = iv2 + start
+static Value *foldDependentIVs(PHINode &PN, IRBuilderBase &Builder) {
+ BasicBlock *BB = PN.getParent();
+ if (PN.getNumIncomingValues() != 2)
+ return nullptr;
+
+ Value *Start;
+ Instruction *IvNext;
+ BinaryOperator *Iv2Next;
+ auto MatchOuterIV = [&](Value *V1, Value *V2) {
+ if (match(V2, m_c_Add(m_Specific(V1), m_BinOp(Iv2Next))) ||
+ match(V2, m_PtrAdd(m_Specific(V1), m_BinOp(Iv2Next)))) {
+ Start = V1;
+ IvNext = cast<Instruction>(V2);
+ return true;
+ }
+ return false;
+ };
+
+ if (!MatchOuterIV(PN.getIncomingValue(0), PN.getIncomingValue(1)) &&
+ !MatchOuterIV(PN.getIncomingValue(1), PN.getIncomingValue(0)))
+ return nullptr;
+
+ PHINode *Iv2;
+ Value *Iv2Start, *Iv2Step;
+ if (!matchSimpleRecurrence(Iv2Next, Iv2, Iv2Start, Iv2Step) ||
+ Iv2->getParent() != BB || !match(Iv2Start, m_Zero()))
+ return nullptr;
+
+ Builder.SetInsertPoint(&*BB, BB->getFirstInsertionPt());
+ if (Start->getType()->isPtrOrPtrVectorTy())
+ return Builder.CreatePtrAdd(Start, Iv2, "",
+ cast<GEPOperator>(IvNext)->isInBounds());
+
+ auto *OBO = cast<OverflowingBinaryOperator>(IvNext);
+ return Builder.CreateAdd(Start, Iv2, "", OBO->hasNoUnsignedWrap(),
+ OBO->hasNoSignedWrap());
+}
+
// PHINode simplification
//
Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
@@ -1595,5 +1636,8 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (auto *V = simplifyUsingControlFlow(*this, PN, DT))
return replaceInstUsesWith(PN, V);
+ if (Value *Res = foldDependentIVs(PN, Builder))
+ return replaceInstUsesWith(PN, Res);
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/dependent-ivs.ll b/llvm/test/Transforms/InstCombine/dependent-ivs.ll
index bd6679121dcac..b206a8534cc38 100644
--- a/llvm/test/Transforms/InstCombine/dependent-ivs.ll
+++ b/llvm/test/Transforms/InstCombine/dependent-ivs.ll
@@ -7,11 +7,10 @@ define void @int_iv_nuw(i64 %base, i64 %end) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV2:%.*]] = add nuw i64 [[IV]], [[BASE]]
; CHECK-NEXT: call void @use.i64(i64 [[IV2]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[IV2_NEXT]] = add nuw i64 [[IV_NEXT]], [[BASE]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -39,11 +38,10 @@ define void @int_iv_nsw(i64 %base, i64 %end) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV2:%.*]] = add nsw i64 [[IV]], [[BASE]]
; CHECK-NEXT: call void @use.i64(i64 [[IV2]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV_NEXT]], [[BASE]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -72,11 +70,10 @@ define void @int_iv_commuted(i64 %base, i64 %end) {
; CHECK-NEXT: [[BASE2:%.*]] = mul i64 [[BASE]], 42
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE2]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV2:%.*]] = add i64 [[BASE2]], [[IV]]
; CHECK-NEXT: call void @use.i64(i64 [[IV2]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[IV2_NEXT]] = add i64 [[BASE2]], [[IV_NEXT]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -105,11 +102,10 @@ define void @int_iv_vector(<2 x i64> %base) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV2:%.*]] = phi <2 x i64> [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV2:%.*]] = add <2 x i64> [[IV]], [[BASE]]
; CHECK-NEXT: call void @use.v2i64(<2 x i64> [[IV2]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], <i64 4, i64 4>
-; CHECK-NEXT: [[IV2_NEXT]] = add <2 x i64> [[IV_NEXT]], [[BASE]]
; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1()
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -137,12 +133,11 @@ define void @int_iv_loop_variant_step(i64 %base, i64 %end) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV2:%.*]] = add nuw i64 [[IV]], [[BASE]]
; CHECK-NEXT: call void @use.i64(i64 [[IV2]])
; CHECK-NEXT: [[STEP:%.*]] = call i64 @get.i64()
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], [[STEP]]
-; CHECK-NEXT: [[IV2_NEXT]] = add nuw i64 [[IV_NEXT]], [[BASE]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -171,11 +166,10 @@ define void @ptr_iv_inbounds(ptr %base, i64 %end) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[IV_PTR_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_PTR:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i64 [[IV]]
; CHECK-NEXT: call void @use.p0(ptr [[IV_PTR]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i8, ptr [[BASE]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -203,11 +197,10 @@ define void @ptr_iv_no_inbounds(ptr %base, i64 %end) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[IV_PTR_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_PTR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[IV]]
; CHECK-NEXT: call void @use.p0(ptr [[IV_PTR]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[IV_PTR_NEXT]] = getelementptr i8, ptr [[BASE]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -235,11 +228,10 @@ define void @ptr_iv_vector(<2 x ptr> %base, i64 %end) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_PTR:%.*]] = phi <2 x ptr> [ [[IV_PTR_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_PTR:%.*]] = getelementptr inbounds i8, <2 x ptr> [[BASE]], i64 [[IV]]
; CHECK-NEXT: call void @use.v2p0(<2 x ptr> [[IV_PTR]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i8, <2 x ptr> [[BASE]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
@@ -267,11 +259,10 @@ define void @ptr_iv_vector2(<2 x ptr> %base) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_PTR:%.*]] = phi <2 x ptr> [ [[IV_PTR_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[IV_PTR:%.*]] = getelementptr i8, <2 x ptr> [[BASE]], <2 x i64> [[IV]]
; CHECK-NEXT: call void @use.v2p0(<2 x ptr> [[IV_PTR]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], <i64 4, i64 4>
-; CHECK-NEXT: [[IV_PTR_NEXT]] = getelementptr i8, <2 x ptr> [[BASE]], <2 x i64> [[IV_NEXT]]
; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1()
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
|
Do you have any performance data to show that this change is a benefit to backends? |
This depends on the backend, and LSR will convert it into the preferred form. For targets with weak addressing modes like RISCV, LSR will prefer the old form, for targets with strong addressing modes like X86 or ARM it will prefer the new. In that sense, it shouldn't really matter for the backend which form we chose, as it will be rewritten anyway, so the primary consideration would be about the middle end. The context for this PR is #81614, where I encountered this pattern and thought it is a regression. I don't have a motivation for it beyond that. |
Think its definitely fair to claim its a simplification. |
Instruction *IvNext; | ||
BinaryOperator *Iv2Next; | ||
auto MatchOuterIV = [&](Value *V1, Value *V2) { | ||
if (match(V2, m_c_Add(m_Specific(V1), m_BinOp(Iv2Next))) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would think just about any binop would work here: https://alive2.llvm.org/ce/z/nA8yg3 (for sub/xor).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a bit more complicated than that. I've updated the comment above the transform to be a bit more precise about what the precondition here is. We need iv2.start op start = start
to hold, which is true when op
is +
and iv2.start
is 0. It's also true for xor. For sub it depends on which operand it is. For &
this is invalid -- but would be valid if iv2.start
is -1 instead.
It would be possible to fully generalize this by doing a simplifyInstruction with the iv2.next operand replaced by iv2.start and checking that it's equal to start, and then instead of creating new instructions instead clone the existing one and replace operands. But this adds extra complexity, and it's not clear it is useful. Based on @dtcxzyw's tests, the GEP case actually accounts for the vast majority of occurrences of this pattern.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about using ConstantExpr::getBinOpIdentity
?
Then really its just change the !match(Iv2Start, m_Zero())
-> !match(Iv2Start, m_SpecificInt(Identity))
If you want to leave AllowRHSConstant
as a todo, then all that is left is m_c_Add
-> m_c_BinOp
.
Seems roughly same impl complexity + more principalled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
75daf22
to
2e75b4c
Compare
Fold `iv = phi(start, iv.next = iv2.next + start)` where `iv2 = phi(iv2.start, iv2.next = iv2 + iv2.step)` to `iv = iv2 + start` removing one induction variable from the loop. Proof: https://alive2.llvm.org/ce/z/hfmwgf
LGTM |
Instruction *IvNext; | ||
BinaryOperator *Iv2Next; | ||
auto MatchOuterIV = [&](Value *V1, Value *V2) { | ||
if (match(V2, m_c_BinOp(m_Specific(V1), m_BinOp(Iv2Next))) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Maybe add a todo for detecting if Iv2Next
is rhs/lhs to cover more binops (like sub).
Instruction *IvNext; | ||
BinaryOperator *Iv2Next; | ||
auto MatchOuterIV = [&](Value *V1, Value *V2) { | ||
if (match(V2, m_c_BinOp(m_Specific(V1), m_BinOp(Iv2Next))) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
m_c_BinOp
also matches non-commutative binops. Is this behavior expected?
llvm-project/llvm/include/llvm/IR/PatternMatch.h
Lines 911 to 927 in 8f7ae64
template <typename LHS_t, typename RHS_t, bool Commutable = false> | |
struct AnyBinaryOp_match { | |
LHS_t L; | |
RHS_t R; | |
// The evaluation order is always stable, regardless of Commutability. | |
// The LHS is always matched first. | |
AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | |
template <typename OpTy> bool match(OpTy *V) { | |
if (auto *I = dyn_cast<BinaryOperator>(V)) | |
return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | |
(Commutable && L.match(I->getOperand(1)) && | |
R.match(I->getOperand(0))); | |
return false; | |
} | |
}; |
llvm-project/llvm/include/llvm/IR/PatternMatch.h
Lines 2410 to 2414 in 8f7ae64
/// Matches a BinaryOperator with LHS and RHS in either order. | |
template <typename LHS, typename RHS> | |
inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { | |
return AnyBinaryOp_match<LHS, RHS, true>(L, R); | |
} |
It doesn't cause miscompilation because ConstantExpr::getBinOpIdentity
returns null for non-commutative binops.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm aware -- this is why there is an isCommutative() assertion below, in case the behavior of getBinOpIdentity() ever changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both the implementation and IR diff LGTM. Thanks!
Fold
iv = phi(start, iv.next = iv2.next + start)
whereiv2 = phi(iv2.start, iv2.next = iv2 + iv2.step)
to
iv = iv2 + start
removing one induction variable from the loop.Proof: https://alive2.llvm.org/ce/z/hfmwgf
Fixes #77108.