diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0ca56f08c333d8..467b291f9a4c3a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1456,6 +1456,43 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) { return UsedIndices.all() ? V : nullptr; } +/// Fold an unsigned minimum of trailing or leading zero bits counts: +/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp)) +/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin +/// >> ConstOp)) +template +static Value * +foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, + const DataLayout &DL, + InstCombiner::BuilderTy &Builder) { + static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz, + "This helper only supports cttz and ctlz intrinsics"); + + Value *CtOp; + Value *ZeroUndef; + if (!match(I0, + m_OneUse(m_Intrinsic(m_Value(CtOp), m_Value(ZeroUndef))))) + return nullptr; + + unsigned BitWidth = I1->getType()->getScalarSizeInBits(); + auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); }; + if (!match(I1, m_CheckedInt(LessBitWidth))) + // We have a constant >= BitWidth (which can be handled by CVP) + // or a non-splat vector with elements < and >= BitWidth + return nullptr; + + Type *Ty = I1->getType(); + Constant *NewConst = ConstantFoldBinaryOpOperands( + IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr, + IntrID == Intrinsic::cttz + ? ConstantInt::get(Ty, 1) + : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)), + cast(I1), DL); + return Builder.CreateBinaryIntrinsic( + IntrID, Builder.CreateOr(CtOp, NewConst), + ConstantInt::getTrue(ZeroUndef->getType())); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -1661,6 +1698,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *Cmp = Builder.CreateICmpNE(I0, Zero); return CastInst::Create(Instruction::ZExt, Cmp, II->getType()); } + // umin(cttz(x), const) --> cttz(x | (1 << const)) + if (Value *FoldedCttz = + foldMinimumOverTrailingOrLeadingZeroCount( + I0, I1, DL, Builder)) + return replaceInstUsesWith(*II, FoldedCttz); + // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const)) + if (Value *FoldedCtlz = + foldMinimumOverTrailingOrLeadingZeroCount( + I0, I1, DL, Builder)) + return replaceInstUsesWith(*II, FoldedCtlz); [[fallthrough]]; } case Intrinsic::umax: { diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll new file mode 100644 index 00000000000000..0d87122660cfa1 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll @@ -0,0 +1,382 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i8 @umin_cttz_i8_zero_undefined(i8 %X) { +; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) +; CHECK-NEXT: ret i8 [[RET]] +; + %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) + %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 6) + ret i8 %ret +} + +define i8 @umin_cttz_i8_zero_defined(i8 %X) { +; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) +; CHECK-NEXT: ret i8 [[RET]] +; + %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false) + %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 6) + ret i8 %ret +} + +define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) { +; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true) +; CHECK-NEXT: ret i8 [[RET]] +; + %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) + %ret = call i8 @llvm.umin.i8(i8 6, i8 %cttz) + ret i8 %ret +} + +define i8 @umin_cttz_i8_negative_ge_bitwidth_zero_undefined(i8 %X) { +; CHECK-LABEL: define i8 @umin_cttz_i8_negative_ge_bitwidth_zero_undefined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true) +; CHECK-NEXT: ret i8 [[CTTZ]] +; + %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true) + %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 10) + ret i8 %ret +} + +define i16 @umin_cttz_i16_zero_undefined(i16 %X) { +; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined( +; CHECK-SAME: i16 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true) +; CHECK-NEXT: ret i16 [[RET]] +; + %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true) + %ret = call i16 @llvm.umin.i16(i16 %cttz, i16 6) + ret i16 %ret +} + +define i32 @umin_cttz_i32_zero_undefined(i32 %X) { +; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true) +; CHECK-NEXT: ret i32 [[RET]] +; + %cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true) + %ret = call i32 @llvm.umin.i32(i32 %cttz, i32 6) + ret i32 %ret +} + +define i64 @umin_cttz_i64_zero_undefined(i64 %X) { +; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64 +; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true) +; CHECK-NEXT: ret i64 [[RET]] +; + %cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true) + %ret = call i64 @llvm.umin.i64(i64 %cttz, i64 6) + ret i64 %ret +} + +define i1 @umin_cttz_i1_zero_undefined(i1 %X) { +; CHECK-LABEL: define i1 @umin_cttz_i1_zero_undefined( +; CHECK-SAME: i1 [[X:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %cttz = call i1 @llvm.cttz.i1(i1 %X, i1 true) + %ret = call i1 @llvm.umin.i1(i1 %cttz, i1 1) + ret i1 %ret +} + +define i1 @umin_cttz_i1_zero_defined(i1 %X) { +; CHECK-LABEL: define i1 @umin_cttz_i1_zero_defined( +; CHECK-SAME: i1 [[X:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = xor i1 [[X]], true +; CHECK-NEXT: ret i1 [[CTTZ]] +; + %cttz = call i1 @llvm.cttz.i1(i1 %X, i1 false) + %ret = call i1 @llvm.umin.i1(i1 %cttz, i1 1) + ret i1 %ret +} + +define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] +; + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_cttz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] +; + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> ) + ret <2 x i32> %ret +} + +define i16 @umin_cttz_i16_negative_non_constant(i16 %X, i16 %Y) { +; CHECK-LABEL: define i16 @umin_cttz_i16_negative_non_constant( +; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true) +; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 [[Y]]) +; CHECK-NEXT: ret i16 [[RET]] +; + %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true) + %ret = call i16 @llvm.umin.i16(i16 %cttz, i16 %Y) + ret i16 %ret +} + +define i16 @umin_cttz_i16_negative_two_uses(i16 %X) { +; CHECK-LABEL: define i16 @umin_cttz_i16_negative_two_uses( +; CHECK-SAME: i16 [[X:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true) +; CHECK-NEXT: [[OP0:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6) +; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i16 [[CTTZ]], [[OP0]] +; CHECK-NEXT: ret i16 [[RET]] +; + %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true) + %op0 = call i16 @llvm.umin.i16(i16 %cttz, i16 6) + %ret = add i16 %cttz, %op0 + ret i16 %ret +} + +define i8 @umin_ctlz_i8_zero_undefined(i8 %X) { +; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) +; CHECK-NEXT: ret i8 [[RET]] +; + %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) + %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 6) + ret i8 %ret +} + +define i8 @umin_ctlz_i8_zero_defined(i8 %X) { +; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) +; CHECK-NEXT: ret i8 [[RET]] +; + %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false) + %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 6) + ret i8 %ret +} + +define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) { +; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true) +; CHECK-NEXT: ret i8 [[RET]] +; + %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) + %ret = call i8 @llvm.umin.i8(i8 6, i8 %ctlz) + ret i8 %ret +} + +define i8 @umin_ctlz_i8_negative_ge_bitwidth_zero_undefined(i8 %X) { +; CHECK-LABEL: define i8 @umin_ctlz_i8_negative_ge_bitwidth_zero_undefined( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true) +; CHECK-NEXT: ret i8 [[CTLZ]] +; + %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true) + %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 10) + ret i8 %ret +} + +define i16 @umin_ctlz_i16_zero_undefined(i16 %X) { +; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined( +; CHECK-SAME: i16 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512 +; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true) +; CHECK-NEXT: ret i16 [[RET]] +; + %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true) + %ret = call i16 @llvm.umin.i16(i16 %ctlz, i16 6) + ret i16 %ret +} + +define i32 @umin_ctlz_i32_zero_undefined(i32 %X) { +; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432 +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true) +; CHECK-NEXT: ret i32 [[RET]] +; + %ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true) + %ret = call i32 @llvm.umin.i32(i32 %ctlz, i32 6) + ret i32 %ret +} + +define i64 @umin_ctlz_i64_zero_undefined(i64 %X) { +; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872 +; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true) +; CHECK-NEXT: ret i64 [[RET]] +; + %ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true) + %ret = call i64 @llvm.umin.i64(i64 %ctlz, i64 6) + ret i64 %ret +} + +define i1 @umin_ctlz_i1_zero_undefined(i1 %X) { +; CHECK-LABEL: define i1 @umin_ctlz_i1_zero_undefined( +; CHECK-SAME: i1 [[X:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %ctlz = call i1 @llvm.ctlz.i1(i1 %X, i1 true) + %ret = call i1 @llvm.umin.i1(i1 %ctlz, i1 1) + ret i1 %ret +} + +define i1 @umin_ctlz_i1_zero_defined(i1 %X) { +; CHECK-LABEL: define i1 @umin_ctlz_i1_zero_defined( +; CHECK-SAME: i1 [[X:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = xor i1 [[X]], true +; CHECK-NEXT: ret i1 [[CTLZ]] +; + %ctlz = call i1 @llvm.ctlz.i1(i1 %X, i1 false) + %ret = call i1 @llvm.umin.i1(i1 %ctlz, i1 1) + ret i1 %ret +} + +define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] +; + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> ) + ret <2 x i32> %ret +} + +define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) { +; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true) +; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[RET1]] +; + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true) + %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> ) + ret <2 x i32> %ret +} + +define i16 @umin_ctlz_i16_negative_non_constant(i16 %X, i16 %Y) { +; CHECK-LABEL: define i16 @umin_ctlz_i16_negative_non_constant( +; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) { +; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true) +; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 [[Y]]) +; CHECK-NEXT: ret i16 [[RET]] +; + %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true) + %ret = call i16 @llvm.umin.i16(i16 %ctlz, i16 %Y) + ret i16 %ret +} + +define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) { +; CHECK-LABEL: define i16 @umin_ctlz_i16_negative_two_uses( +; CHECK-SAME: i16 [[X:%.*]]) { +; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true) +; CHECK-NEXT: [[OP0:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6) +; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i16 [[CTTZ]], [[OP0]] +; CHECK-NEXT: ret i16 [[RET]] +; + %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true) + %op0 = call i16 @llvm.umin.i16(i16 %ctlz, i16 6) + %ret = add i16 %ctlz, %op0 + ret i16 %ret +}