[InstSimplify] Remove the remainder loop if we know the mask is alway…

…s true We check the loop trip count is known a power of 2 to determine whether the tail loop can be eliminated in D146199. However, the remainder loop of mask scalable loop can also be removed If we know the mask is always going to be true for every vector iteration. Depend on the assume of power-of-two vscale on D155350 proofs： https://alive2.llvm.org/ce/z/FkTMoy Fix #63616. Reviewed By: goldstein.w.n, nikic, david-arm, paulwalker-arm Differential Revision: https://reviews.llvm.org/D154953
llvm · Aug 1, 2023 · 3e386b2 · 3e386b2
1 parent 44d14a1
commit 3e386b2
Show file tree

Hide file tree

Showing 5 changed files with 265 additions and 429 deletions.
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -78,6 +78,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
                                               ArrayRef<Value *> NewOps,
                                               const SimplifyQuery &SQ,
                                               unsigned MaxRecurse);
+static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
+                                               Value *LHS, Value *RHS,
+                                               const SimplifyQuery &Q);
 
 static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
                                      Value *FalseVal) {
@@ -2116,6 +2119,32 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       return Op0;
   }
 
+  // and 2^x-1, 2^C --> 0 where x <= C.
+  const APInt *PowerC;
+  Value *Shift;
+  if (match(Op1, m_Power2(PowerC)) &&
+      match(Op0, m_Add(m_Value(Shift), m_AllOnes())) &&
+      isKnownToBeAPowerOfTwo(Shift, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+                             Q.DT)) {
+    KnownBits Known = computeKnownBits(Shift, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    // Use getActiveBits() to make use of the additional power of two knowledge
+    if (PowerC->getActiveBits() >= Known.getMaxValue().getActiveBits())
+      return ConstantInt::getNullValue(Op1->getType());
+  }
+
+  // not (-1 << x), 1 << C --> 0 where x <= C.
+  // Fold 1 << x into ~(-1 << x) in canonicalizeLowbitMask
+  if (match(Op1, m_Power2(PowerC)) && match(Op0, m_Not(m_Value(Shift))) &&
+      match(Shift, m_Shl(m_AllOnes(), m_Value(X)))) {
+    Value *ShiftC = ConstantInt::get(Op1->getType(), PowerC->logBase2());
+    if (auto *V =
+            simplifyICmpWithDominatingAssume(CmpInst::ICMP_ULE, X, ShiftC, Q)) {
+      auto *CV = cast<ConstantInt>(V);
+      if (CV->isOne())
+        return ConstantInt::getNullValue(Op1->getType());
+    }
+  }
+
   // If we have a multiplication overflow check that is being 'and'ed with a
   // check that one of the multipliers is not zero, we can omit the 'and', and
   // only keep the overflow check.

diff --git a/llvm/test/Transforms/InstCombine/and-add-shl.ll b/llvm/test/Transforms/InstCombine/and-add-shl.ll
@@ -10,10 +10,7 @@ define i8 @and_add_shl(i8 %x) {
 ; CHECK-SAME: (i8 [[X:%.*]]) {
 ; CHECK-NEXT:    [[OP1_P2:%.*]] = icmp ult i8 [[X]], 6
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[OP1_P2]])
-; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i8 -1, [[X]]
-; CHECK-NEXT:    [[SUB:%.*]] = and i8 [[NOTMASK]], 32
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[SUB]], 32
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %op1_p2 = icmp ule i8 %x, 5
   call void @llvm.assume(i1 %op1_p2)
@@ -29,10 +26,7 @@ define i8 @and_not_shl(i8 %x) {
 ; CHECK-SAME: (i8 [[X:%.*]]) {
 ; CHECK-NEXT:    [[OP1_P2:%.*]] = icmp ult i8 [[X]], 6
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[OP1_P2]])
-; CHECK-NEXT:    [[SHIFT:%.*]] = shl i8 -1, [[X]]
-; CHECK-NEXT:    [[NOT:%.*]] = and i8 [[SHIFT]], 32
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[NOT]], 32
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %op1_p2 = icmp ule i8 %x, 5
   call void @llvm.assume(i1 %op1_p2)

diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
@@ -843,11 +843,7 @@ define i64 @urem_shl_vscale() {
 
 define i64 @urem_shl_vscale_range() vscale_range(1,16) {
 ; CHECK-LABEL: @urem_shl_vscale_range(
-; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 2
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
-; CHECK-NEXT:    [[REM:%.*]] = and i64 [[TMP1]], 1024
-; CHECK-NEXT:    ret i64 [[REM]]
+; CHECK-NEXT:    ret i64 0
 ;
   %vscale = call i64 @llvm.vscale.i64()
   %shift = shl nuw nsw i64 %vscale, 2
@@ -857,11 +853,7 @@ define i64 @urem_shl_vscale_range() vscale_range(1,16) {
 
 define i64 @urem_vscale_range() vscale_range(1,16) {
 ; CHECK-LABEL: @urem_vscale_range(
-; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 6
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
-; CHECK-NEXT:    [[REM:%.*]] = and i64 [[TMP1]], 1024
-; CHECK-NEXT:    ret i64 [[REM]]
+; CHECK-NEXT:    ret i64 0
 ;
   %vscale = call i64 @llvm.vscale.i64()
   %shift = shl nuw nsw i64 %vscale, 6