Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LICM] Drop nsw/nuw flags on affected instructions in hoistMulAddAssociation. #85486

Merged
merged 1 commit into from
Mar 18, 2024

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Mar 15, 2024

Since we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate.

Fixes #85457.

…ciation.

Since we are introducing new multiplies earlier in the arithmetic,
the nsw/nuw flags on later instructions are no longer accurate.

Fixes llvm#85457.
@llvmbot
Copy link
Collaborator

llvmbot commented Mar 15, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Craig Topper (topperc)

Changes

Since we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate.

Fixes #85457.


Full diff: https://github.com/llvm/llvm-project/pull/85486.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/LICM.cpp (+12-2)
  • (modified) llvm/test/Transforms/LICM/expr-reassociate-int.ll (+45-14)
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 40bc16f9b57516..e50413de46b1b4 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2701,6 +2701,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
 
   // First, we need to make sure we should do the transformation.
   SmallVector<Use *> Changes;
+  SmallVector<BinaryOperator *> Adds;
   SmallVector<BinaryOperator *> Worklist;
   if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
     Worklist.push_back(VariantBinOp);
@@ -2713,6 +2714,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
         isa<BinaryOperator>(BO->getOperand(1))) {
       Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
       Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
+      Adds.push_back(BO);
       continue;
     }
     if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
@@ -2735,6 +2737,12 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
   if (Changes.empty())
     return false;
 
+  // Drop the poison flags for any adds we looked through.
+  if (I.getType()->isIntOrIntVectorTy()) {
+    for (auto *Add : Adds)
+      Add->dropPoisonGeneratingFlags();
+  }
+
   // We know we should do it so let's do the transformation.
   auto *Preheader = L.getLoopPreheader();
   assert(Preheader && "Loop is not in simplify form?");
@@ -2743,9 +2751,11 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
     assert(L.isLoopInvariant(U->get()));
     Instruction *Ins = cast<Instruction>(U->getUser());
     Value *Mul;
-    if (I.getType()->isIntOrIntVectorTy())
+    if (I.getType()->isIntOrIntVectorTy()) {
       Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
-    else
+      // Drop the poison flags on the original multiply.
+      Ins->dropPoisonGeneratingFlags();
+    } else
       Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
     U->set(Mul);
   }
diff --git a/llvm/test/Transforms/LICM/expr-reassociate-int.ll b/llvm/test/Transforms/LICM/expr-reassociate-int.ll
index 63548974fb3185..70288731e31c6d 100644
--- a/llvm/test/Transforms/LICM/expr-reassociate-int.ll
+++ b/llvm/test/Transforms/LICM/expr-reassociate-int.ll
@@ -23,7 +23,7 @@ define void @innermost_loop_1d_shouldhoist(i32 %i, i64 %d1, i64 %delta, ptr %cel
 ; CHECK-LABEL: define void @innermost_loop_1d_shouldhoist
 ; CHECK-SAME: (i32 [[I:%.*]], i64 [[D1:%.*]], i64 [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL_1:%.*]] = mul i64 [[DELTA]], [[D1]]
+; CHECK-NEXT:    [[MUL_1:%.*]] = mul nuw nsw i64 [[DELTA]], [[D1]]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -55,7 +55,7 @@ for.body:
   %idxprom.j.1 = zext i32 %add.j.1 to i64
   %arrayidx.j.1 = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j.1
   %cell.1 = load i64, ptr %arrayidx.j.1, align 8
-  %mul.1 = mul i64 %delta, %d1
+  %mul.1 = mul nsw nuw i64 %delta, %d1
   %mul.2 = mul i64 %mul.1, %cell.1
   %idxprom.j = zext i32 %j to i64
   %arrayidx.j = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j
@@ -130,8 +130,8 @@ define void @innermost_loop_2d(i32 %i, i64 %d1, i64 %d2, i64 %delta, ptr %cells)
 ; CONSTRAINED-NEXT:    [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
 ; CONSTRAINED-NEXT:    [[ARRAYIDX_J:%.*]] = getelementptr inbounds i64, ptr [[CELLS]], i64 [[IDXPROM_J]]
 ; CONSTRAINED-NEXT:    [[CELL_2:%.*]] = load i64, ptr [[ARRAYIDX_J]], align 8
-; CONSTRAINED-NEXT:    [[MUL_2:%.*]] = mul i64 [[CELL_2]], [[D2]]
-; CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = add i64 [[MUL_2]], [[MUL_1]]
+; CONSTRAINED-NEXT:    [[MUL_2:%.*]] = mul nuw nsw i64 [[CELL_2]], [[D2]]
+; CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = add nuw nsw i64 [[MUL_2]], [[MUL_1]]
 ; CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = mul i64 [[REASS_ADD]], [[DELTA]]
 ; CONSTRAINED-NEXT:    store i64 [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
 ; CONSTRAINED-NEXT:    br label [[FOR_COND]]
@@ -155,8 +155,8 @@ for.body:
   %idxprom.j = zext i32 %j to i64
   %arrayidx.j = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j
   %cell.2 = load i64, ptr %arrayidx.j, align 8
-  %mul.2 = mul i64 %cell.2, %d2
-  %reass.add = add i64 %mul.2, %mul.1
+  %mul.2 = mul nsw nuw i64 %cell.2, %d2
+  %reass.add = add nsw nuw i64 %mul.2, %mul.1
   %reass.mul = mul i64 %reass.add, %delta
   store i64 %reass.mul, ptr %arrayidx.j, align 8
   br label %for.cond
@@ -243,10 +243,10 @@ define void @innermost_loop_3d(i32 %i, i64 %d1, i64 %d2, i64 %d3, i64 %delta, pt
 ; CONSTRAINED-NEXT:    [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64
 ; CONSTRAINED-NEXT:    [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds i64, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
 ; CONSTRAINED-NEXT:    [[CELL_3:%.*]] = load i64, ptr [[ARRAYIDX_J_2]], align 8
-; CONSTRAINED-NEXT:    [[MUL_3:%.*]] = mul i64 [[CELL_3]], [[D3]]
-; CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = add i64 [[MUL_2]], [[MUL_1]]
-; CONSTRAINED-NEXT:    [[REASS_ADD1:%.*]] = add i64 [[REASS_ADD]], [[MUL_3]]
-; CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = mul i64 [[REASS_ADD1]], [[DELTA]]
+; CONSTRAINED-NEXT:    [[MUL_3:%.*]] = mul nuw nsw i64 [[CELL_3]], [[D3]]
+; CONSTRAINED-NEXT:    [[REASS_ADD:%.*]] = add nuw nsw i64 [[MUL_2]], [[MUL_1]]
+; CONSTRAINED-NEXT:    [[REASS_ADD1:%.*]] = add nuw nsw i64 [[REASS_ADD]], [[MUL_3]]
+; CONSTRAINED-NEXT:    [[REASS_MUL:%.*]] = mul nuw nsw i64 [[REASS_ADD1]], [[DELTA]]
 ; CONSTRAINED-NEXT:    store i64 [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8
 ; CONSTRAINED-NEXT:    br label [[FOR_COND]]
 ; CONSTRAINED:       for.end:
@@ -274,10 +274,10 @@ for.body:
   %idxprom.j.2 = zext i32 %add.j.2 to i64
   %arrayidx.j.2 = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j.2
   %cell.3 = load i64, ptr %arrayidx.j.2, align 8
-  %mul.3 = mul i64 %cell.3, %d3
-  %reass.add = add i64 %mul.2, %mul.1
-  %reass.add1 = add i64 %reass.add, %mul.3
-  %reass.mul = mul i64 %reass.add1, %delta
+  %mul.3 = mul nsw nuw i64 %cell.3, %d3
+  %reass.add = add nsw nuw i64 %mul.2, %mul.1
+  %reass.add1 = add nsw nuw i64 %reass.add, %mul.3
+  %reass.mul = mul nsw nuw i64 %reass.add1, %delta
   store i64 %reass.mul, ptr %arrayidx.j.2, align 8
   br label %for.cond
 
@@ -362,3 +362,34 @@ for.body:
 for.end:
   ret void
 }
+
+; Make sure we drop poison flags on the mul in the loop.
+define i32 @pr85457(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @pr85457
+; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FACTOR_OP_MUL:%.*]] = mul i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[MUL0:%.*]] = mul i32 [[FACTOR_OP_MUL]], [[IV]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[MUL0]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %iv.next = add nuw nsw i32 %iv, 1
+  %mul0 = mul nuw nsw i32 %x, %iv
+  %mul1 = mul nuw i32 %mul0, %y
+  %cmp = icmp slt i32 %mul1, 1
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret i32 0
+}

// Drop the poison flags for any adds we looked through.
if (I.getType()->isIntOrIntVectorTy()) {
for (auto *Add : Adds)
Add->dropPoisonGeneratingFlags();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the flag drop on the adds does not have test coverage?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't they covered by the NOT_CONSTRAINED-NEXT lines? On the test case where I added nsw nuw to adds?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I got confused by the two sets of check lines.

@danilaml
Copy link
Collaborator

This should also fix #85448

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@topperc topperc merged commit 1261c02 into llvm:main Mar 18, 2024
6 checks passed
@topperc topperc deleted the pr/licm-bug branch March 18, 2024 18:46
chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024
…ciation. (llvm#85486)

Since we are introducing new multiplies earlier in the arithmetic, the
nsw/nuw flags on later instructions are no longer accurate.

Fixes llvm#85457.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[LICM] Mul association incorrectly combines no-wrap flags
4 participants