diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 53f1c144c546a..88d9cc5403eb9 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1428,6 +1428,15 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::smul_fix_sat: case Intrinsic::bitreverse: case Intrinsic::is_constant: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umax: return true; // Floating point operations cannot be folded in strictfp functions in @@ -1647,6 +1656,53 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, return GetConstantFoldFPValue(V, Ty); } +Constant *ConstantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { + FixedVectorType *VT = dyn_cast(Op->getType()); + if (!VT) + return nullptr; + ConstantInt *CI = dyn_cast(Op->getAggregateElement(0U)); + if (!CI) + return nullptr; + APInt Acc = CI->getValue(); + + for (unsigned I = 1; I < VT->getNumElements(); I++) { + if (!(CI = dyn_cast(Op->getAggregateElement(I)))) + return nullptr; + const APInt &X = CI->getValue(); + switch (IID) { + case Intrinsic::experimental_vector_reduce_add: + Acc = Acc + X; + break; + case Intrinsic::experimental_vector_reduce_mul: + Acc = Acc * X; + break; + case Intrinsic::experimental_vector_reduce_and: + Acc = Acc & X; + break; + case Intrinsic::experimental_vector_reduce_or: + Acc = Acc | X; + break; + case Intrinsic::experimental_vector_reduce_xor: + Acc = Acc ^ X; + break; + case Intrinsic::experimental_vector_reduce_smin: + Acc = APIntOps::smin(Acc, X); + break; + case Intrinsic::experimental_vector_reduce_smax: + Acc = APIntOps::smax(Acc, X); + break; + case Intrinsic::experimental_vector_reduce_umin: + Acc = APIntOps::umin(Acc, X); + break; + case Intrinsic::experimental_vector_reduce_umax: + Acc = APIntOps::umax(Acc, X); + break; + } + } + + return ConstantInt::get(Op->getContext(), Acc); +} + /// Attempt to fold an SSE floating point to integer conversion of a constant /// floating point. If roundTowardZero is false, the default IEEE rounding is /// used (toward nearest, ties to even). This matches the behavior of the @@ -2086,12 +2142,40 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, } } + if (isa(Operands[0])) { + switch (IntrinsicID) { + default: break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umax: + return ConstantInt::get(Ty, 0); + } + } + // Support ConstantVector in case we have an Undef in the top. if (isa(Operands[0]) || isa(Operands[0])) { auto *Op = cast(Operands[0]); switch (IntrinsicID) { default: break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_umax: + if (Constant *C = ConstantFoldVectorReduce(IntrinsicID, Op)) + return C; + break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse2_cvtsd2si: diff --git a/llvm/test/Analysis/ConstantFolding/vecreduce.ll b/llvm/test/Analysis/ConstantFolding/vecreduce.ll new file mode 100644 index 0000000000000..4d6ba1b45db10 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/vecreduce.ll @@ -0,0 +1,481 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +declare i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.mul.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.or.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.xor.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.smin.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.smax.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.umin.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a) +declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %a) + + +define i32 @add_0() { +; CHECK-LABEL: @add_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @add_1() { +; CHECK-LABEL: @add_1( +; CHECK-NEXT: ret i32 8 +; + %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @add_inc() { +; CHECK-LABEL: @add_inc( +; CHECK-NEXT: ret i32 18 +; + %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @add_1v() { +; CHECK-LABEL: @add_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @add_undef() { +; CHECK-LABEL: @add_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @add_undef1() { +; CHECK-LABEL: @add_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> ) + ret i32 %x +} + + + +define i32 @mul_0() { +; CHECK-LABEL: @mul_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @mul_1() { +; CHECK-LABEL: @mul_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @mul_inc() { +; CHECK-LABEL: @mul_inc( +; CHECK-NEXT: ret i32 40320 +; + %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @mul_1v() { +; CHECK-LABEL: @mul_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.mul.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @mul_undef() { +; CHECK-LABEL: @mul_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @mul_undef1() { +; CHECK-LABEL: @mul_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @and_0() { +; CHECK-LABEL: @and_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @and_1() { +; CHECK-LABEL: @and_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @and_inc() { +; CHECK-LABEL: @and_inc( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @and_1v() { +; CHECK-LABEL: @and_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @and_undef() { +; CHECK-LABEL: @and_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @and_undef1() { +; CHECK-LABEL: @and_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @or_0() { +; CHECK-LABEL: @or_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @or_1() { +; CHECK-LABEL: @or_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @or_inc() { +; CHECK-LABEL: @or_inc( +; CHECK-NEXT: ret i32 -1 +; + %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @or_1v() { +; CHECK-LABEL: @or_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.or.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @or_undef() { +; CHECK-LABEL: @or_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @or_undef1() { +; CHECK-LABEL: @or_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @xor_0() { +; CHECK-LABEL: @xor_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @xor_1() { +; CHECK-LABEL: @xor_1( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @xor_inc() { +; CHECK-LABEL: @xor_inc( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @xor_1v() { +; CHECK-LABEL: @xor_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.xor.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @xor_undef() { +; CHECK-LABEL: @xor_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @xor_undef1() { +; CHECK-LABEL: @xor_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @smin_0() { +; CHECK-LABEL: @smin_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @smin_1() { +; CHECK-LABEL: @smin_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @smin_inc() { +; CHECK-LABEL: @smin_inc( +; CHECK-NEXT: ret i32 -6 +; + %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @smin_1v() { +; CHECK-LABEL: @smin_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.smin.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @smin_undef() { +; CHECK-LABEL: @smin_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @smin_undef1() { +; CHECK-LABEL: @smin_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @smax_0() { +; CHECK-LABEL: @smax_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @smax_1() { +; CHECK-LABEL: @smax_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @smax_inc() { +; CHECK-LABEL: @smax_inc( +; CHECK-NEXT: ret i32 8 +; + %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @smax_1v() { +; CHECK-LABEL: @smax_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.smax.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @smax_undef() { +; CHECK-LABEL: @smax_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @smax_undef1() { +; CHECK-LABEL: @smax_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @umin_0() { +; CHECK-LABEL: @umin_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @umin_1() { +; CHECK-LABEL: @umin_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @umin_inc() { +; CHECK-LABEL: @umin_inc( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @umin_1v() { +; CHECK-LABEL: @umin_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.umin.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @umin_undef() { +; CHECK-LABEL: @umin_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @umin_undef1() { +; CHECK-LABEL: @umin_undef1( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> ) + ret i32 %x +} + + +define i32 @umax_0() { +; CHECK-LABEL: @umax_0( +; CHECK-NEXT: ret i32 0 +; + %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> zeroinitializer) + ret i32 %x +} + +define i32 @umax_1() { +; CHECK-LABEL: @umax_1( +; CHECK-NEXT: ret i32 1 +; + %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @umax_inc() { +; CHECK-LABEL: @umax_inc( +; CHECK-NEXT: ret i32 -3 +; + %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) + ret i32 %x +} + +define i32 @umax_1v() { +; CHECK-LABEL: @umax_1v( +; CHECK-NEXT: ret i32 10 +; + %x = call i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> ) + ret i32 %x +} + +define i32 @umax_undef() { +; CHECK-LABEL: @umax_undef( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) + ret i32 %x +} + +define i32 @umax_undef1d() { +; CHECK-LABEL: @umax_undef1d( +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) +; CHECK-NEXT: ret i32 [[X]] +; + %x = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> ) + ret i32 %x +}