Skip to content

Commit

Permalink
[InstCombine] Fold minmax intrinsic using KnownBits information (#76242)
Browse files Browse the repository at this point in the history
This patch tries to fold minmax intrinsic by using
`computeConstantRangeIncludingKnownBits`.
Fixes regression in
[_karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c](https://github.com/python/cpython/blob/c31943af16f885c8cf5d5a690c25c366afdb2862/Modules/_decimal/libmpdec/mpdecimal.c#L5460-L5462),
which was introduced by #71396.
See also
dtcxzyw/llvm-opt-benchmark#16 (comment).

Alive2 for splat vectors with undef: https://alive2.llvm.org/ce/z/J8hKWd
  • Loading branch information
dtcxzyw committed Dec 22, 2023
1 parent 9b6ea5e commit 345d7b1
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 4 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/ValueTracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned,
const DominatorTree *DT = nullptr,
unsigned Depth = 0);

/// Combine constant ranges from computeConstantRange() and computeKnownBits().
ConstantRange
computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
bool ForSigned, const SimplifyQuery &SQ);

/// Return true if this function can prove that the instruction I will
/// always transfer execution to one of its successors (including the next
/// instruction that follows within a basic block). E.g. this is not
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6289,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
}

/// Combine constant ranges from computeConstantRange() and computeKnownBits().
static ConstantRange
computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
bool ForSigned,
const SimplifyQuery &SQ) {
ConstantRange
llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
bool ForSigned,
const SimplifyQuery &SQ) {
ConstantRange CR1 =
ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1796,6 +1796,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
return NewMinMax;

// Try to fold minmax with constant RHS based on range information
const APInt *RHSC;
if (match(I1, m_APIntAllowUndef(RHSC))) {
ICmpInst::Predicate Pred =
ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
bool IsSigned = MinMaxIntrinsic::isSigned(IID);
ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
I0, IsSigned, SQ.getWithInstruction(II));
if (!LHS_CR.isFullSet()) {
if (LHS_CR.icmp(Pred, *RHSC))
return replaceInstUsesWith(*II, I0);
if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
return replaceInstUsesWith(*II,
ConstantInt::get(II->getType(), *RHSC));
}
}

break;
}
case Intrinsic::bitreverse: {
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2489,3 +2489,95 @@ define i1 @PR57986() {
%umin = call i1 @llvm.umin.i1(i1 ptrtoint (ptr @g to i1), i1 true)
ret i1 %umin
}

define i8 @fold_umax_with_knownbits_info(i8 %a, i8 %b) {
; CHECK-LABEL: @fold_umax_with_knownbits_info(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
; CHECK-NEXT: ret i8 [[SUB]]
;
entry:
%a1 = or i8 %a, 1
%a2 = shl i8 %b, 1
%sub = sub i8 %a1, %a2
%val = call i8 @llvm.umax.i8(i8 %sub, i8 1)
ret i8 %val
}

define <3 x i8> @fold_umax_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) {
; CHECK-LABEL: @fold_umax_with_knownbits_info_undef_in_splat(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], <i8 1, i8 1, i8 1>
; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], <i8 1, i8 1, i8 1>
; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i8> [[A1]], [[A2]]
; CHECK-NEXT: ret <3 x i8> [[SUB]]
;
entry:
%a1 = or <3 x i8> %a, <i8 1, i8 1, i8 1>
%a2 = shl <3 x i8> %b, <i8 1, i8 1, i8 1>
%sub = sub <3 x i8> %a1, %a2
%val = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %sub, <3 x i8> <i8 1, i8 undef, i8 1>)
ret <3 x i8> %val
}

define i8 @fold_umin_with_knownbits_info(i8 %a, i8 %b) {
; CHECK-LABEL: @fold_umin_with_knownbits_info(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i8 3
;
entry:
%a1 = or i8 %a, 3
%a2 = shl i8 %b, 2
%sub = sub i8 %a1, %a2
%val = call i8 @llvm.umin.i8(i8 %sub, i8 3)
ret i8 %val
}

define <3 x i8> @fold_umin_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) {
; CHECK-LABEL: @fold_umin_with_knownbits_info_undef_in_splat(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <3 x i8> <i8 3, i8 3, i8 3>
;
entry:
%a1 = or <3 x i8> %a, <i8 3, i8 3, i8 3>
%a2 = shl <3 x i8> %b, <i8 2, i8 2, i8 2>
%sub = sub <3 x i8> %a1, %a2
%val = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sub, <3 x i8> <i8 3, i8 undef, i8 3>)
ret <3 x i8> %val
}

define i8 @fold_umax_with_knownbits_info_fail(i8 %a, i8 %b) {
; CHECK-LABEL: @fold_umax_with_knownbits_info_fail(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 2
; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umax.i8(i8 [[SUB]], i8 1)
; CHECK-NEXT: ret i8 [[VAL]]
;
entry:
%a1 = or i8 %a, 2
%a2 = shl i8 %b, 1
%sub = sub i8 %a1, %a2
%val = call i8 @llvm.umax.i8(i8 %sub, i8 1)
ret i8 %val
}

define i8 @fold_umin_with_knownbits_info_fail(i8 %a, i8 %b) {
; CHECK-LABEL: @fold_umin_with_knownbits_info_fail(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 2
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umin.i8(i8 [[SUB]], i8 3)
; CHECK-NEXT: ret i8 [[VAL]]
;
entry:
%a1 = or i8 %a, 1
%a2 = shl i8 %b, 2
%sub = sub i8 %a1, %a2
%val = call i8 @llvm.umin.i8(i8 %sub, i8 3)
ret i8 %val
}

0 comments on commit 345d7b1

Please sign in to comment.