-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ValueTracking] Add support for vector_reduce_{s,u}{min,max}
in isKnownNonZero
/computeKnownBits
.
#88169
[ValueTracking] Add support for vector_reduce_{s,u}{min,max}
in isKnownNonZero
/computeKnownBits
.
#88169
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/88169.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 3970efba18cc8c..7287a8fb122bbb 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -145,11 +145,21 @@ bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
bool isKnownPositive(const Value *V, const SimplifyQuery &SQ,
unsigned Depth = 0);
+/// Returns true if the given value is known be positive (i.e. non-negative
+/// and non-zero) for DemandedElts.
+bool isKnownPositive(const Value *V, const APInt &DemandedElts,
+ const SimplifyQuery &SQ, unsigned Depth = 0);
+
/// Returns true if the given value is known be negative (i.e. non-positive
/// and non-zero).
bool isKnownNegative(const Value *V, const SimplifyQuery &DL,
unsigned Depth = 0);
+/// Returns true if the given value is known be negative (i.e. non-positive
+/// and non-zero) for DemandedElts.
+bool isKnownNegative(const Value *V, const APInt &DemandedElts,
+ const SimplifyQuery &DL, unsigned Depth = 0);
+
/// Return true if the given values are known to be non-equal when defined.
/// Supports scalar integer types only.
bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ca48cfe7738154..922fd65374e1c7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -289,21 +289,52 @@ bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
return computeKnownBits(V, Depth, SQ).isNonNegative();
}
-bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
- unsigned Depth) {
+static bool isKnownPositive(const Value *V, const APInt &DemandedElts,
+ KnownBits &Known, const SimplifyQuery &SQ,
+ unsigned Depth) {
if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isStrictlyPositive();
// If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
// this updated.
- KnownBits Known = computeKnownBits(V, Depth, SQ);
+ Known = computeKnownBits(V, DemandedElts, Depth, SQ);
return Known.isNonNegative() &&
- (Known.isNonZero() || ::isKnownNonZero(V, Depth, SQ));
+ (Known.isNonZero() || ::isKnownNonZero(V, DemandedElts, Depth, SQ));
+}
+
+bool llvm::isKnownPositive(const Value *V, const APInt &DemandedElts,
+ const SimplifyQuery &SQ, unsigned Depth) {
+ KnownBits Known(getBitWidth(V->getType(), SQ.DL));
+ return ::isKnownPositive(V, DemandedElts, Known, SQ, Depth);
+}
+
+bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
+ unsigned Depth) {
+ auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+ APInt DemandedElts =
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+ return isKnownPositive(V, DemandedElts, SQ, Depth);
+}
+
+static bool isKnownNegative(const Value *V, const APInt &DemandedElts,
+ KnownBits &Known, const SimplifyQuery &SQ,
+ unsigned Depth) {
+ Known = computeKnownBits(V, DemandedElts, Depth, SQ);
+ return Known.isNegative();
+}
+
+bool llvm::isKnownNegative(const Value *V, const APInt &DemandedElts,
+ const SimplifyQuery &SQ, unsigned Depth) {
+ KnownBits Known(getBitWidth(V->getType(), SQ.DL));
+ return ::isKnownNegative(V, DemandedElts, Known, SQ, Depth);
}
bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
unsigned Depth) {
- return computeKnownBits(V, Depth, SQ).isNegative();
+ auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+ APInt DemandedElts =
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+ return isKnownNegative(V, DemandedElts, SQ, Depth);
}
static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
@@ -1621,6 +1652,40 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
Known = KnownBits::ssub_sat(Known, Known2);
break;
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ unsigned NumEle = VecTy->getNumElements();
+ computeKnownBits(II->getArgOperand(0), APInt::getOneBitSet(NumEle, 0),
+ Known, Depth + 1, Q);
+ for (unsigned Idx = 1; Idx < NumEle; ++Idx) {
+ computeKnownBits(II->getArgOperand(0),
+ APInt::getOneBitSet(NumEle, Idx), Known2,
+ Depth + 1, Q);
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::vector_reduce_umax:
+ Known = KnownBits::umax(Known, Known2);
+ break;
+ case Intrinsic::vector_reduce_umin:
+ Known = KnownBits::umin(Known, Known2);
+ break;
+ case Intrinsic::vector_reduce_smax:
+ Known = KnownBits::smax(Known, Known2);
+ break;
+ case Intrinsic::vector_reduce_smin:
+ Known = KnownBits::smin(Known, Known2);
+ break;
+ default:
+ llvm_unreachable("Invalid Intrinsinc in vec reduce min/max case");
+ }
+ }
+ } else {
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ }
+ break;
case Intrinsic::umin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
@@ -2824,6 +2889,49 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
II->getArgOperand(0), II->getArgOperand(1),
/*NSW=*/true, /* NUW=*/false);
+ case Intrinsic::vector_reduce_umax:
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ unsigned NumEle = VecTy->getNumElements();
+ // If any element is non-zero the reduce is non-zero.
+ for (unsigned Idx = 0; Idx < NumEle; ++Idx) {
+ if (isKnownNonZero(II->getArgOperand(0),
+ APInt::getOneBitSet(NumEle, Idx), Depth, Q))
+ return true;
+ }
+ return false;
+ }
+ [[fallthrough]];
+ case Intrinsic::vector_reduce_umin:
+ return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ bool AllNonZero = true;
+ auto EleImpliesNonZero = [&](const APInt &DemandedEle) {
+ KnownBits TmpKnown(
+ getBitWidth(II->getArgOperand(0)->getType(), Q.DL));
+ bool Ret = II->getIntrinsicID() == Intrinsic::vector_reduce_smin
+ ? ::isKnownNegative(II->getArgOperand(0),
+ DemandedEle, TmpKnown, Q, Depth)
+ : ::isKnownPositive(II->getArgOperand(0),
+ DemandedEle, TmpKnown, Q, Depth);
+ AllNonZero &= TmpKnown.isNonZero();
+ return Ret;
+ };
+ unsigned NumEle = VecTy->getNumElements();
+ // If any element is negative/strictly-positive (for smin/smax
+ // respectively) the reduce is non-zero.
+ for (unsigned Idx = 0; Idx < NumEle; ++Idx) {
+ if (EleImpliesNonZero(APInt::getOneBitSet(NumEle, Idx)))
+ return true;
+ }
+ if (AllNonZero)
+ return true;
+ }
+ // Otherwise, if all elements are non-zero, result is non-zero
+ return isKnownNonZero(II->getArgOperand(0), Depth, Q);
case Intrinsic::umax:
case Intrinsic::uadd_sat:
return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
new file mode 100644
index 00000000000000..bee5124404a943
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 0, i8 0>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero_fail(
+; CHECK-NEXT: [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x = add nsw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero_fail(
+; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero0(
+; CHECK-NEXT: ret i1 false
+;
+ %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero1(
+; CHECK-NEXT: ret i1 false
+;
+ %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+ %x = or <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+ %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero_fail(
+; CHECK-NEXT: [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+ %x = add nuw <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+ %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero0(
+; CHECK-NEXT: ret i1 false
+;
+ %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero1(
+; CHECK-NEXT: ret i1 false
+;
+ %x = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+ %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero_fail(
+; CHECK-NEXT: [[X0:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
+; CHECK-NEXT: [[X:%.*]] = add <4 x i8> [[X0]], <i8 0, i8 0, i8 0, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+ %x = add <4 x i8> %x0, <i8 0, i8 0, i8 0, i8 1>
+ %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known0(
+; CHECK-NEXT: ret i8 1
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known1(
+; CHECK-NEXT: ret i8 -128
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = and i8 %v, 128
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail0(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail1(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known0(
+; CHECK-NEXT: ret i8 1
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known1(
+; CHECK-NEXT: ret i8 0
+;
+ %x = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+ %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+ %r = and i8 %v, 128
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail0(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+ %x = or <4 x i8> %xx, <i8 1, i8 0, i8 0, i8 0>
+ %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail1(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+ %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known(
+; CHECK-NEXT: ret i8 4
+;
+ %x = or <4 x i8> %xx, <i8 4, i8 4, i8 4, i8 5>
+ %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+ %r = and i8 %v, 4
+ ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known_fail(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 8, i8 5>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <4 x i8> %xx, <i8 4, i8 4, i8 8, i8 5>
+ %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+ %r = and i8 %v, 4
+ ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known(
+; CHECK-NEXT: ret i8 8
+;
+ %x = or <4 x i8> %xx, <i8 8, i8 24, i8 56, i8 9>
+ %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known_fail(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 23, i8 56, i8 9>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <4 x i8> %xx, <i8 8, i8 23, i8 56, i8 9>
+ %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
|
vector_reduce_{s,u}{min,max}
in isKnownNonZero
/computeKnownBits
.
If these changes are desirable ill add support for the rest of the reduce intrinsics. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think some basic support here could be reasonable, but this implementation looks too precise. In particular, I think the approach of doing a per-element evaluation of the vector is unprecedented -- we generally only look at all (demanded) elements as a unit.
So the implementation I'd expect would be more something along the lines of taking the known bits of the input vector and using those as the result known bits as well. (For min/max reductions that is, not quite that simple for some of the other ones.)
for |
…KnownNonZero` Previously missing, proofs for all implementations: https://alive2.llvm.org/ce/z/G8wpmG
…mputeKnownBits` Previously missing. We compute by just applying the reduce function on the knownbits of each element.
16a97a3
to
ed78299
Compare
Either way, updated this so its quite simple. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
Yeah, I don't think we should do them if they require per-element reasoning to do something worthwhile. We can at least handle and/or though (and only in KnownBits). |
Kk, ill post patches for the rest tomorrow. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
vector_reduce_{s,u}{min,max}
; NFCvector_reduce_{s,u}{min,max}
inisKnownNonZero
vector_reduce_{s,u}{min,max}
incomputeKnownBits