[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`/`computeKnownBits`. #88169

goldsteinn · 2024-04-09T17:56:41Z

[InstCombine] Add tests for non-zero/knownbits of vector_reduce_{s,u}{min,max}; NFC
[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in isKnownNonZero
[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in computeKnownBits

llvmbot · 2024-04-09T17:57:12Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: None (goldsteinn)

Changes

[ValueTracking] Expand isKnown{Negative,Positive} APIs; NFC
[InstCombine] Add tests for non-zero/knownbits of vector_reduce_{s,u}{min,max}; NFC
[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in isKnownNonZero
[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in computeKnownBits

Full diff: https://github.com/llvm/llvm-project/pull/88169.diff

3 Files Affected:

(modified) llvm/include/llvm/Analysis/ValueTracking.h (+10)
(modified) llvm/lib/Analysis/ValueTracking.cpp (+113-5)
(added) llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll (+258)

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 3970efba18cc8c..7287a8fb122bbb 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -145,11 +145,21 @@ bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
 bool isKnownPositive(const Value *V, const SimplifyQuery &SQ,
                      unsigned Depth = 0);
 
+/// Returns true if the given value is known be positive (i.e. non-negative
+/// and non-zero) for DemandedElts.
+bool isKnownPositive(const Value *V, const APInt &DemandedElts,
+                     const SimplifyQuery &SQ, unsigned Depth = 0);
+
 /// Returns true if the given value is known be negative (i.e. non-positive
 /// and non-zero).
 bool isKnownNegative(const Value *V, const SimplifyQuery &DL,
                      unsigned Depth = 0);
 
+/// Returns true if the given value is known be negative (i.e. non-positive
+/// and non-zero) for DemandedElts.
+bool isKnownNegative(const Value *V, const APInt &DemandedElts,
+                     const SimplifyQuery &DL, unsigned Depth = 0);
+
 /// Return true if the given values are known to be non-equal when defined.
 /// Supports scalar integer types only.
 bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ca48cfe7738154..922fd65374e1c7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -289,21 +289,52 @@ bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
   return computeKnownBits(V, Depth, SQ).isNonNegative();
 }
 
-bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
-                           unsigned Depth) {
+static bool isKnownPositive(const Value *V, const APInt &DemandedElts,
+                            KnownBits &Known, const SimplifyQuery &SQ,
+                            unsigned Depth) {
   if (auto *CI = dyn_cast<ConstantInt>(V))
     return CI->getValue().isStrictlyPositive();
 
   // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
   // this updated.
-  KnownBits Known = computeKnownBits(V, Depth, SQ);
+  Known = computeKnownBits(V, DemandedElts, Depth, SQ);
   return Known.isNonNegative() &&
-         (Known.isNonZero() || ::isKnownNonZero(V, Depth, SQ));
+         (Known.isNonZero() || ::isKnownNonZero(V, DemandedElts, Depth, SQ));
+}
+
+bool llvm::isKnownPositive(const Value *V, const APInt &DemandedElts,
+                           const SimplifyQuery &SQ, unsigned Depth) {
+  KnownBits Known(getBitWidth(V->getType(), SQ.DL));
+  return ::isKnownPositive(V, DemandedElts, Known, SQ, Depth);
+}
+
+bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
+                           unsigned Depth) {
+  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+  APInt DemandedElts =
+      FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+  return isKnownPositive(V, DemandedElts, SQ, Depth);
+}
+
+static bool isKnownNegative(const Value *V, const APInt &DemandedElts,
+                            KnownBits &Known, const SimplifyQuery &SQ,
+                            unsigned Depth) {
+  Known = computeKnownBits(V, DemandedElts, Depth, SQ);
+  return Known.isNegative();
+}
+
+bool llvm::isKnownNegative(const Value *V, const APInt &DemandedElts,
+                           const SimplifyQuery &SQ, unsigned Depth) {
+  KnownBits Known(getBitWidth(V->getType(), SQ.DL));
+  return ::isKnownNegative(V, DemandedElts, Known, SQ, Depth);
 }
 
 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
                            unsigned Depth) {
-  return computeKnownBits(V, Depth, SQ).isNegative();
+  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+  APInt DemandedElts =
+      FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+  return isKnownNegative(V, DemandedElts, SQ, Depth);
 }
 
 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
@@ -1621,6 +1652,40 @@ static void computeKnownBitsFromOperator(const Operator *I,
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
         Known = KnownBits::ssub_sat(Known, Known2);
         break;
+      case Intrinsic::vector_reduce_umax:
+      case Intrinsic::vector_reduce_umin:
+      case Intrinsic::vector_reduce_smax:
+      case Intrinsic::vector_reduce_smin:
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+          unsigned NumEle = VecTy->getNumElements();
+          computeKnownBits(II->getArgOperand(0), APInt::getOneBitSet(NumEle, 0),
+                           Known, Depth + 1, Q);
+          for (unsigned Idx = 1; Idx < NumEle; ++Idx) {
+            computeKnownBits(II->getArgOperand(0),
+                             APInt::getOneBitSet(NumEle, Idx), Known2,
+                             Depth + 1, Q);
+            switch (II->getIntrinsicID()) {
+            case Intrinsic::vector_reduce_umax:
+              Known = KnownBits::umax(Known, Known2);
+              break;
+            case Intrinsic::vector_reduce_umin:
+              Known = KnownBits::umin(Known, Known2);
+              break;
+            case Intrinsic::vector_reduce_smax:
+              Known = KnownBits::smax(Known, Known2);
+              break;
+            case Intrinsic::vector_reduce_smin:
+              Known = KnownBits::smin(Known, Known2);
+              break;
+            default:
+              llvm_unreachable("Invalid Intrinsinc in vec reduce min/max case");
+            }
+          }
+        } else {
+          computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+        }
+        break;
       case Intrinsic::umin:
         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
@@ -2824,6 +2889,49 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
         return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
                             II->getArgOperand(0), II->getArgOperand(1),
                             /*NSW=*/true, /* NUW=*/false);
+      case Intrinsic::vector_reduce_umax:
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+          unsigned NumEle = VecTy->getNumElements();
+          // If any element is non-zero the reduce is non-zero.
+          for (unsigned Idx = 0; Idx < NumEle; ++Idx) {
+            if (isKnownNonZero(II->getArgOperand(0),
+                               APInt::getOneBitSet(NumEle, Idx), Depth, Q))
+              return true;
+          }
+          return false;
+        }
+        [[fallthrough]];
+      case Intrinsic::vector_reduce_umin:
+        return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+      case Intrinsic::vector_reduce_smax:
+      case Intrinsic::vector_reduce_smin:
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+          bool AllNonZero = true;
+          auto EleImpliesNonZero = [&](const APInt &DemandedEle) {
+            KnownBits TmpKnown(
+                getBitWidth(II->getArgOperand(0)->getType(), Q.DL));
+            bool Ret = II->getIntrinsicID() == Intrinsic::vector_reduce_smin
+                           ? ::isKnownNegative(II->getArgOperand(0),
+                                               DemandedEle, TmpKnown, Q, Depth)
+                           : ::isKnownPositive(II->getArgOperand(0),
+                                               DemandedEle, TmpKnown, Q, Depth);
+            AllNonZero &= TmpKnown.isNonZero();
+            return Ret;
+          };
+          unsigned NumEle = VecTy->getNumElements();
+          // If any element is negative/strictly-positive (for smin/smax
+          // respectively) the reduce is non-zero.
+          for (unsigned Idx = 0; Idx < NumEle; ++Idx) {
+            if (EleImpliesNonZero(APInt::getOneBitSet(NumEle, Idx)))
+              return true;
+          }
+          if (AllNonZero)
+            return true;
+        }
+        // Otherwise, if all elements are non-zero, result is non-zero
+        return isKnownNonZero(II->getArgOperand(0), Depth, Q);
       case Intrinsic::umax:
       case Intrinsic::uadd_sat:
         return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
new file mode 100644
index 00000000000000..bee5124404a943
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero_fail(
+; CHECK-NEXT:    [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nsw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero_fail(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero0(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero1(
+; CHECK-NEXT:    ret i1 false
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = or <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero_fail(
+; CHECK-NEXT:    [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = add nuw <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero0(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero1(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero_fail(
+; CHECK-NEXT:    [[X0:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
+; CHECK-NEXT:    [[X:%.*]] = add <4 x i8> [[X0]], <i8 0, i8 0, i8 0, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+  %x = add <4 x i8> %x0, <i8 0, i8 0, i8 0, i8 1>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known0(
+; CHECK-NEXT:    ret i8 1
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known1(
+; CHECK-NEXT:    ret i8 -128
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 128
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known0(
+; CHECK-NEXT:    ret i8 1
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known1(
+; CHECK-NEXT:    ret i8 0
+;
+  %x = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 128
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = or <4 x i8> %xx, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known(
+; CHECK-NEXT:    ret i8 4
+;
+  %x = or <4 x i8> %xx, <i8 4, i8 4, i8 4, i8 5>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = and i8 %v, 4
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known_fail(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 8, i8 5>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 4, i8 4, i8 8, i8 5>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 4
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known(
+; CHECK-NEXT:    ret i8 8
+;
+  %x = or <4 x i8> %xx, <i8 8, i8 24, i8 56, i8 9>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = and i8 %v, 8
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known_fail(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 23, i8 56, i8 9>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 8, i8 23, i8 56, i8 9>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = and i8 %v, 8
+  ret i8 %r
+}

goldsteinn · 2024-04-09T17:58:07Z

If these changes are desirable ill add support for the rest of the reduce intrinsics.

nikic

I think some basic support here could be reasonable, but this implementation looks too precise. In particular, I think the approach of doing a per-element evaluation of the vector is unprecedented -- we generally only look at all (demanded) elements as a unit.

So the implementation I'd expect would be more something along the lines of taking the known bits of the input vector and using those as the result known bits as well. (For min/max reductions that is, not quite that simple for some of the other ones.)

…}{min,max}`; NFC

goldsteinn · 2024-04-10T04:29:52Z

I think some basic support here could be reasonable, but this implementation looks too precise. In particular, I think the approach of doing a per-element evaluation of the vector is unprecedented -- we generally only look at all (demanded) elements as a unit.

So the implementation I'd expect would be more something along the lines of taking the known bits of the input vector and using those as the result known bits as well. (For min/max reductions that is, not quite that simple for some of the other ones.)

for min/max/or its pretty easy to do a reasonable job w.o per-ele, but that kind of prohibits things like xor/add/etc... Are you opposed to those being added in general?

…KnownNonZero` Previously missing, proofs for all implementations: https://alive2.llvm.org/ce/z/G8wpmG

…mputeKnownBits` Previously missing. We compute by just applying the reduce function on the knownbits of each element.

goldsteinn · 2024-04-10T04:34:40Z

I think some basic support here could be reasonable, but this implementation looks too precise. In particular, I think the approach of doing a per-element evaluation of the vector is unprecedented -- we generally only look at all (demanded) elements as a unit.
So the implementation I'd expect would be more something along the lines of taking the known bits of the input vector and using those as the result known bits as well. (For min/max reductions that is, not quite that simple for some of the other ones.)

for min/max/or its pretty easy to do a reasonable job w.o per-ele, but that kind of prohibits things like xor/add/etc... Are you opposed to those being added in general?

Either way, updated this so its quite simple.

nikic

LG

nikic · 2024-04-10T05:22:02Z

I think some basic support here could be reasonable, but this implementation looks too precise. In particular, I think the approach of doing a per-element evaluation of the vector is unprecedented -- we generally only look at all (demanded) elements as a unit.
So the implementation I'd expect would be more something along the lines of taking the known bits of the input vector and using those as the result known bits as well. (For min/max reductions that is, not quite that simple for some of the other ones.)

for min/max/or its pretty easy to do a reasonable job w.o per-ele, but that kind of prohibits things like xor/add/etc... Are you opposed to those being added in general?

Yeah, I don't think we should do them if they require per-element reasoning to do something worthwhile. We can at least handle and/or though (and only in KnownBits).

goldsteinn · 2024-04-10T06:03:07Z

Yeah, I don't think we should do them if they require per-element reasoning to do something worthwhile. We can at least handle and/or though (and only in KnownBits).

Kk, ill post patches for the rest tomorrow.
Thinking a bit more we will be able to do a bit for most operators.

RKSimon

LGTM

goldsteinn requested a review from nikic as a code owner April 9, 2024 17:56

llvmbot added llvm:analysis llvm:transforms labels Apr 9, 2024

goldsteinn changed the title ~~perf/goldsteinn/support reduce minmax~~ [ValueTracking] Add support for vector_reduce_{s,u}{min,max} in isKnownNonZero/computeKnownBits. Apr 9, 2024

goldsteinn requested a review from dtcxzyw April 9, 2024 17:57

nikic requested changes Apr 10, 2024

View reviewed changes

[InstCombine] Add tests for non-zero/knownbits of `vector_reduce_{s,u…

2ee1477

…}{min,max}`; NFC

goldsteinn added 2 commits April 9, 2024 23:34

[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in `is…

6a359c1

…KnownNonZero` Previously missing, proofs for all implementations: https://alive2.llvm.org/ce/z/G8wpmG

[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in `co…

ed78299

…mputeKnownBits` Previously missing. We compute by just applying the reduce function on the knownbits of each element.

goldsteinn force-pushed the perf/goldsteinn/support-reduce-minmax branch from 16a97a3 to ed78299 Compare April 10, 2024 04:34

nikic approved these changes Apr 10, 2024

View reviewed changes

dtcxzyw requested a review from RKSimon April 10, 2024 15:13

RKSimon approved these changes Apr 10, 2024

View reviewed changes

goldsteinn closed this in 41c5221 Apr 10, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`/`computeKnownBits`. #88169

[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`/`computeKnownBits`. #88169

goldsteinn commented Apr 9, 2024 •

edited

llvmbot commented Apr 9, 2024 •

edited

goldsteinn commented Apr 9, 2024

nikic left a comment •

edited

goldsteinn commented Apr 10, 2024

goldsteinn commented Apr 10, 2024

nikic left a comment

nikic commented Apr 10, 2024

goldsteinn commented Apr 10, 2024

RKSimon left a comment

[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in isKnownNonZero/computeKnownBits. #88169

[ValueTracking] Add support for vector_reduce_{s,u}{min,max} in isKnownNonZero/computeKnownBits. #88169

Conversation

goldsteinn commented Apr 9, 2024 • edited

llvmbot commented Apr 9, 2024 • edited

goldsteinn commented Apr 9, 2024

nikic left a comment • edited

Choose a reason for hiding this comment

goldsteinn commented Apr 10, 2024

goldsteinn commented Apr 10, 2024

nikic left a comment

Choose a reason for hiding this comment

nikic commented Apr 10, 2024

goldsteinn commented Apr 10, 2024

RKSimon left a comment

Choose a reason for hiding this comment

[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`/`computeKnownBits`. #88169

[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`/`computeKnownBits`. #88169

goldsteinn commented Apr 9, 2024 •

edited

llvmbot commented Apr 9, 2024 •

edited

nikic left a comment •

edited