Skip to content

Commit

Permalink
[InstCombine] Optimize overflow check base on uadd.with.overflow result
Browse files Browse the repository at this point in the history
Fix for https://bugs.llvm.org/show_bug.cgi?id=40846.

This adds a combine for cases where a (a + b) < a style overflow
check is performed, but with a + b being the result of
uadd.with.overflow, so the overflow result is also already available
and we can just use it. Subsequently GVN/CSE will deduplicate the extracts.

We can run into this situation if you have both a uadd.with.overflow
and a manual add + overflow check in the same function (on the same
operands), in which case GVN will rewrite the add to the with.overflow
result and leave you with this pattern.

The implementation is a bit ugly because I'm handling the various
canonicalization edge cases.

This does not yet handle the negated version of this pattern.

Differential Revision: https://reviews.llvm.org/D58644
  • Loading branch information
nikic committed Dec 11, 2019
1 parent 5882e6f commit 8db5143
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
33 changes: 33 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Expand Up @@ -5386,6 +5386,36 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
return nullptr;
}

// extract(uadd.with.overflow(A, B), 0) ult A
// -> extract(uadd.with.overflow(A, B), 1)
static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
CmpInst::Predicate Pred = I.getPredicate();
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);

Value *UAddOv;
Value *A, *B;
auto UAddOvResultPat = m_ExtractValue<0>(
m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B)));
if (match(Op0, UAddOvResultPat) &&
((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) ||
(Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) &&
(match(A, m_One()) || match(B, m_One()))) ||
(Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) &&
(match(A, m_AllOnes()) || match(B, m_AllOnes())))))
// extract(uadd.with.overflow(A, B), 0) < A
// extract(uadd.with.overflow(A, 1), 0) == 0
// extract(uadd.with.overflow(A, -1), 0) != -1
UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand();
else if (match(Op1, UAddOvResultPat) &&
Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B))
// A > extract(uadd.with.overflow(A, B), 0)
UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand();
else
return nullptr;

return ExtractValueInst::Create(UAddOv, 1);
}

Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
Expand Down Expand Up @@ -5574,6 +5604,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpEquality(I))
return Res;

if (Instruction *Res = foldICmpOfUAddOv(I))
return Res;

// The 'cmpxchg' instruction returns an aggregate containing the old value and
// an i1 which indicates whether or not we successfully did the swap.
//
Expand Down
21 changes: 7 additions & 14 deletions llvm/test/Transforms/InstCombine/with_overflow.ll
Expand Up @@ -356,8 +356,7 @@ define i1 @uadd_res_ult_x(i32 %x, i32 %y, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], [[X]]
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
Expand All @@ -373,8 +372,7 @@ define i1 @uadd_res_ult_y(i32 %x, i32 %y, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], [[Y]]
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
Expand All @@ -391,8 +389,7 @@ define i1 @uadd_res_ugt_x(i32 %xx, i32 %y, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X]], i32 [[Y:%.*]])
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp ugt i32 [[X]], [[C]]
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%x = urem i32 42, %xx ; Thwart complexity-based canonicalization
Expand All @@ -410,8 +407,7 @@ define i1 @uadd_res_ugt_y(i32 %x, i32 %yy, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y]])
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp ugt i32 [[Y]], [[C]]
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%y = urem i32 42, %yy ; Thwart complexity-based canonicalization
Expand All @@ -428,8 +424,7 @@ define i1 @uadd_res_ult_const(i32 %x, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], 42
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 42)
Expand All @@ -445,8 +440,7 @@ define i1 @uadd_res_ult_const_one(i32 %x, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp eq i32 [[C]], 0
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1)
Expand All @@ -462,8 +456,7 @@ define i1 @uadd_res_ult_const_minus_one(i32 %x, i1* %p) nounwind {
; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 -1)
; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1
; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
; CHECK-NEXT: [[D:%.*]] = icmp ne i32 [[C]], -1
; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1
; CHECK-NEXT: ret i1 [[D]]
;
%a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 -1)
Expand Down

0 comments on commit 8db5143

Please sign in to comment.