-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LICM] Drop nsw/nuw flags on affected instructions in hoistMulAddAssociation. #85486
Conversation
…ciation. Since we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate. Fixes llvm#85457.
@llvm/pr-subscribers-llvm-transforms Author: Craig Topper (topperc) ChangesSince we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate. Fixes #85457. Full diff: https://github.com/llvm/llvm-project/pull/85486.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 40bc16f9b57516..e50413de46b1b4 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2701,6 +2701,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
// First, we need to make sure we should do the transformation.
SmallVector<Use *> Changes;
+ SmallVector<BinaryOperator *> Adds;
SmallVector<BinaryOperator *> Worklist;
if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
Worklist.push_back(VariantBinOp);
@@ -2713,6 +2714,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
isa<BinaryOperator>(BO->getOperand(1))) {
Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
+ Adds.push_back(BO);
continue;
}
if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
@@ -2735,6 +2737,12 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
if (Changes.empty())
return false;
+ // Drop the poison flags for any adds we looked through.
+ if (I.getType()->isIntOrIntVectorTy()) {
+ for (auto *Add : Adds)
+ Add->dropPoisonGeneratingFlags();
+ }
+
// We know we should do it so let's do the transformation.
auto *Preheader = L.getLoopPreheader();
assert(Preheader && "Loop is not in simplify form?");
@@ -2743,9 +2751,11 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
assert(L.isLoopInvariant(U->get()));
Instruction *Ins = cast<Instruction>(U->getUser());
Value *Mul;
- if (I.getType()->isIntOrIntVectorTy())
+ if (I.getType()->isIntOrIntVectorTy()) {
Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
- else
+ // Drop the poison flags on the original multiply.
+ Ins->dropPoisonGeneratingFlags();
+ } else
Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
U->set(Mul);
}
diff --git a/llvm/test/Transforms/LICM/expr-reassociate-int.ll b/llvm/test/Transforms/LICM/expr-reassociate-int.ll
index 63548974fb3185..70288731e31c6d 100644
--- a/llvm/test/Transforms/LICM/expr-reassociate-int.ll
+++ b/llvm/test/Transforms/LICM/expr-reassociate-int.ll
@@ -23,7 +23,7 @@ define void @innermost_loop_1d_shouldhoist(i32 %i, i64 %d1, i64 %delta, ptr %cel
; CHECK-LABEL: define void @innermost_loop_1d_shouldhoist
; CHECK-SAME: (i32 [[I:%.*]], i64 [[D1:%.*]], i64 [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[DELTA]], [[D1]]
+; CHECK-NEXT: [[MUL_1:%.*]] = mul nuw nsw i64 [[DELTA]], [[D1]]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -55,7 +55,7 @@ for.body:
%idxprom.j.1 = zext i32 %add.j.1 to i64
%arrayidx.j.1 = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j.1
%cell.1 = load i64, ptr %arrayidx.j.1, align 8
- %mul.1 = mul i64 %delta, %d1
+ %mul.1 = mul nsw nuw i64 %delta, %d1
%mul.2 = mul i64 %mul.1, %cell.1
%idxprom.j = zext i32 %j to i64
%arrayidx.j = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j
@@ -130,8 +130,8 @@ define void @innermost_loop_2d(i32 %i, i64 %d1, i64 %d2, i64 %delta, ptr %cells)
; CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
; CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds i64, ptr [[CELLS]], i64 [[IDXPROM_J]]
; CONSTRAINED-NEXT: [[CELL_2:%.*]] = load i64, ptr [[ARRAYIDX_J]], align 8
-; CONSTRAINED-NEXT: [[MUL_2:%.*]] = mul i64 [[CELL_2]], [[D2]]
-; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add i64 [[MUL_2]], [[MUL_1]]
+; CONSTRAINED-NEXT: [[MUL_2:%.*]] = mul nuw nsw i64 [[CELL_2]], [[D2]]
+; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add nuw nsw i64 [[MUL_2]], [[MUL_1]]
; CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = mul i64 [[REASS_ADD]], [[DELTA]]
; CONSTRAINED-NEXT: store i64 [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
; CONSTRAINED-NEXT: br label [[FOR_COND]]
@@ -155,8 +155,8 @@ for.body:
%idxprom.j = zext i32 %j to i64
%arrayidx.j = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j
%cell.2 = load i64, ptr %arrayidx.j, align 8
- %mul.2 = mul i64 %cell.2, %d2
- %reass.add = add i64 %mul.2, %mul.1
+ %mul.2 = mul nsw nuw i64 %cell.2, %d2
+ %reass.add = add nsw nuw i64 %mul.2, %mul.1
%reass.mul = mul i64 %reass.add, %delta
store i64 %reass.mul, ptr %arrayidx.j, align 8
br label %for.cond
@@ -243,10 +243,10 @@ define void @innermost_loop_3d(i32 %i, i64 %d1, i64 %d2, i64 %d3, i64 %delta, pt
; CONSTRAINED-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64
; CONSTRAINED-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds i64, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
; CONSTRAINED-NEXT: [[CELL_3:%.*]] = load i64, ptr [[ARRAYIDX_J_2]], align 8
-; CONSTRAINED-NEXT: [[MUL_3:%.*]] = mul i64 [[CELL_3]], [[D3]]
-; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add i64 [[MUL_2]], [[MUL_1]]
-; CONSTRAINED-NEXT: [[REASS_ADD1:%.*]] = add i64 [[REASS_ADD]], [[MUL_3]]
-; CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = mul i64 [[REASS_ADD1]], [[DELTA]]
+; CONSTRAINED-NEXT: [[MUL_3:%.*]] = mul nuw nsw i64 [[CELL_3]], [[D3]]
+; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add nuw nsw i64 [[MUL_2]], [[MUL_1]]
+; CONSTRAINED-NEXT: [[REASS_ADD1:%.*]] = add nuw nsw i64 [[REASS_ADD]], [[MUL_3]]
+; CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = mul nuw nsw i64 [[REASS_ADD1]], [[DELTA]]
; CONSTRAINED-NEXT: store i64 [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8
; CONSTRAINED-NEXT: br label [[FOR_COND]]
; CONSTRAINED: for.end:
@@ -274,10 +274,10 @@ for.body:
%idxprom.j.2 = zext i32 %add.j.2 to i64
%arrayidx.j.2 = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j.2
%cell.3 = load i64, ptr %arrayidx.j.2, align 8
- %mul.3 = mul i64 %cell.3, %d3
- %reass.add = add i64 %mul.2, %mul.1
- %reass.add1 = add i64 %reass.add, %mul.3
- %reass.mul = mul i64 %reass.add1, %delta
+ %mul.3 = mul nsw nuw i64 %cell.3, %d3
+ %reass.add = add nsw nuw i64 %mul.2, %mul.1
+ %reass.add1 = add nsw nuw i64 %reass.add, %mul.3
+ %reass.mul = mul nsw nuw i64 %reass.add1, %delta
store i64 %reass.mul, ptr %arrayidx.j.2, align 8
br label %for.cond
@@ -362,3 +362,34 @@ for.body:
for.end:
ret void
}
+
+; Make sure we drop poison flags on the mul in the loop.
+define i32 @pr85457(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @pr85457
+; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FACTOR_OP_MUL:%.*]] = mul i32 [[X]], [[Y]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[MUL0:%.*]] = mul i32 [[FACTOR_OP_MUL]], [[IV]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MUL0]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+ %iv.next = add nuw nsw i32 %iv, 1
+ %mul0 = mul nuw nsw i32 %x, %iv
+ %mul1 = mul nuw i32 %mul0, %y
+ %cmp = icmp slt i32 %mul1, 1
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret i32 0
+}
|
// Drop the poison flags for any adds we looked through. | ||
if (I.getType()->isIntOrIntVectorTy()) { | ||
for (auto *Add : Adds) | ||
Add->dropPoisonGeneratingFlags(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the flag drop on the adds does not have test coverage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aren't they covered by the NOT_CONSTRAINED-NEXT lines? On the test case where I added nsw nuw
to adds?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I got confused by the two sets of check lines.
This should also fix #85448 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…ciation. (llvm#85486) Since we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate. Fixes llvm#85457.
Since we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate.
Fixes #85457.