diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index a3186e61b94ad..baa16306ebf5d 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned, const DominatorTree *DT = nullptr, unsigned Depth = 0); +/// Combine constant ranges from computeConstantRange() and computeKnownBits(). +ConstantRange +computeConstantRangeIncludingKnownBits(const WithCache &V, + bool ForSigned, const SimplifyQuery &SQ); + /// Return true if this function can prove that the instruction I will /// always transfer execution to one of its successors (including the next /// instruction that follows within a basic block). E.g. this is not diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 769d921eb1e8d..cac2602d455f9 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6289,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { } /// Combine constant ranges from computeConstantRange() and computeKnownBits(). -static ConstantRange -computeConstantRangeIncludingKnownBits(const WithCache &V, - bool ForSigned, - const SimplifyQuery &SQ) { +ConstantRange +llvm::computeConstantRangeIncludingKnownBits(const WithCache &V, + bool ForSigned, + const SimplifyQuery &SQ) { ConstantRange CR1 = ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a272357fa04a4..3b7fe7fa22660 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1796,6 +1796,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *NewMinMax = factorizeMinMaxTree(II)) return NewMinMax; + // Try to fold minmax with constant RHS based on range information + const APInt *RHSC; + if (match(I1, m_APIntAllowUndef(RHSC))) { + ICmpInst::Predicate Pred = + ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID)); + bool IsSigned = MinMaxIntrinsic::isSigned(IID); + ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits( + I0, IsSigned, SQ.getWithInstruction(II)); + if (!LHS_CR.isFullSet()) { + if (LHS_CR.icmp(Pred, *RHSC)) + return replaceInstUsesWith(*II, I0); + if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC)) + return replaceInstUsesWith(*II, + ConstantInt::get(II->getType(), *RHSC)); + } + } + break; } case Intrinsic::bitreverse: { diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index f3833a420ee83..ae2e115b1dd9a 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -2489,3 +2489,95 @@ define i1 @PR57986() { %umin = call i1 @llvm.umin.i1(i1 ptrtoint (ptr @g to i1), i1 true) ret i1 %umin } + +define i8 @fold_umax_with_knownbits_info(i8 %a, i8 %b) { +; CHECK-LABEL: @fold_umax_with_knownbits_info( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1 +; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]] +; CHECK-NEXT: ret i8 [[SUB]] +; +entry: + %a1 = or i8 %a, 1 + %a2 = shl i8 %b, 1 + %sub = sub i8 %a1, %a2 + %val = call i8 @llvm.umax.i8(i8 %sub, i8 1) + ret i8 %val +} + +define <3 x i8> @fold_umax_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: @fold_umax_with_knownbits_info_undef_in_splat( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], +; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i8> [[A1]], [[A2]] +; CHECK-NEXT: ret <3 x i8> [[SUB]] +; +entry: + %a1 = or <3 x i8> %a, + %a2 = shl <3 x i8> %b, + %sub = sub <3 x i8> %a1, %a2 + %val = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %sub, <3 x i8> ) + ret <3 x i8> %val +} + +define i8 @fold_umin_with_knownbits_info(i8 %a, i8 %b) { +; CHECK-LABEL: @fold_umin_with_knownbits_info( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i8 3 +; +entry: + %a1 = or i8 %a, 3 + %a2 = shl i8 %b, 2 + %sub = sub i8 %a1, %a2 + %val = call i8 @llvm.umin.i8(i8 %sub, i8 3) + ret i8 %val +} + +define <3 x i8> @fold_umin_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: @fold_umin_with_knownbits_info_undef_in_splat( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <3 x i8> +; +entry: + %a1 = or <3 x i8> %a, + %a2 = shl <3 x i8> %b, + %sub = sub <3 x i8> %a1, %a2 + %val = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sub, <3 x i8> ) + ret <3 x i8> %val +} + +define i8 @fold_umax_with_knownbits_info_fail(i8 %a, i8 %b) { +; CHECK-LABEL: @fold_umax_with_knownbits_info_fail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 2 +; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]] +; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umax.i8(i8 [[SUB]], i8 1) +; CHECK-NEXT: ret i8 [[VAL]] +; +entry: + %a1 = or i8 %a, 2 + %a2 = shl i8 %b, 1 + %sub = sub i8 %a1, %a2 + %val = call i8 @llvm.umax.i8(i8 %sub, i8 1) + ret i8 %val +} + +define i8 @fold_umin_with_knownbits_info_fail(i8 %a, i8 %b) { +; CHECK-LABEL: @fold_umin_with_knownbits_info_fail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1 +; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 2 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]] +; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umin.i8(i8 [[SUB]], i8 3) +; CHECK-NEXT: ret i8 [[VAL]] +; +entry: + %a1 = or i8 %a, 1 + %a2 = shl i8 %b, 2 + %sub = sub i8 %a1, %a2 + %val = call i8 @llvm.umin.i8(i8 %sub, i8 3) + ret i8 %val +}