diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 50e337413a898..04db59eb0f7b9 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1674,7 +1674,8 @@ m_UnordFMin(const LHS &L, const RHS &R) { } //===----------------------------------------------------------------------===// -// Matchers for overflow check patterns: e.g. (a + b) u< a +// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) @@ -1705,6 +1706,19 @@ struct UAddWithOverflow_match { if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); + Value *Op1; + auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes())); + // (a ^ -1) u (a ^ -1) + if (Pred == ICmpInst::ICMP_UGT) { + if (XorExpr.match(ICmpRHS)) + return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS); + } + // Match special-case for increment-by-1. if (Pred == ICmpInst::ICMP_EQ) { // (a + 1) == 0 diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index e613cb72a9b63..1ac459d0aa423 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -399,7 +399,8 @@ class TypePromotionTransaction; bool simplifyOffsetableRelocate(Instruction &I); bool tryToSinkFreeOperands(Instruction *I); - bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, + bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, + Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID); bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); @@ -1185,6 +1186,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, } bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, + Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { if (BO->getParent() != Cmp->getParent()) { @@ -1202,8 +1204,6 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, } // We allow matching the canonical IR (add X, C) back to (usubo X, -C). - Value *Arg0 = BO->getOperand(0); - Value *Arg1 = BO->getOperand(1); if (BO->getOpcode() == Instruction::Add && IID == Intrinsic::usub_with_overflow) { assert(isa(Arg1) && "Unexpected input for usubo"); @@ -1213,7 +1213,9 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, // Insert at the first instruction of the pair. Instruction *InsertPt = nullptr; for (Instruction &Iter : *Cmp->getParent()) { - if (&Iter == BO || &Iter == Cmp) { + // If BO is an XOR, it is not guaranteed that it comes after both inputs to + // the overflow intrinsic are defined. + if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) { InsertPt = &Iter; break; } @@ -1222,12 +1224,16 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, IRBuilder<> Builder(InsertPt); Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); - Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + if (BO->getOpcode() != Instruction::Xor) { + Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + BO->replaceAllUsesWith(Math); + } else + assert(BO->hasOneUse() && + "Patterns with XOr should use the BO only in the compare"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); - BO->replaceAllUsesWith(Math); Cmp->replaceAllUsesWith(OV); - BO->eraseFromParent(); Cmp->eraseFromParent(); + BO->eraseFromParent(); return true; } @@ -1267,9 +1273,13 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT) { Value *A, *B; BinaryOperator *Add; - if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) + if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) return false; + // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. + A = Add->getOperand(0); + B = Add->getOperand(1); + } if (!TLI->shouldFormOverflowOp(ISD::UADDO, TLI->getValueType(*DL, Add->getType()), @@ -1282,7 +1292,8 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) return false; - if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp, + Intrinsic::uadd_with_overflow)) return false; // Reset callers - do not crash by iterating over a dead instruction. @@ -1344,7 +1355,8 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, Sub->hasNUsesOrMore(2))) return false; - if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), + Cmp, Intrinsic::usub_with_overflow)) return false; // Reset callers - do not crash by iterating over a dead instruction. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b3f4fa43371e3..e139c20954737 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5568,8 +5568,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { isa(A->getType())) { Value *Result; Constant *Overflow; - if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B, - *AddI, Result, Overflow)) { + // m_UAddWithOverflow can match patterns that do not include an explicit + // "add" instruction, so check the opcode of the matched op. + if (AddI->getOpcode() == Instruction::Add && + OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI, + Result, Overflow)) { replaceInstUsesWith(*AddI, Result); return replaceInstUsesWith(I, Overflow); } diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll index 08adbd1507220..8289dc04f4a77 100644 --- a/llvm/test/CodeGen/AArch64/sat-add.ll +++ b/llvm/test/CodeGen/AArch64/sat-add.ll @@ -201,11 +201,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mvn w9, w1 -; CHECK-NEXT: add w10, w0, w1 -; CHECK-NEXT: cmp w8, w9, uxtb -; CHECK-NEXT: csinv w0, w10, wzr, ls +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: add w8, w8, w0, uxtb +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: tst w8, #0x100 +; CHECK-NEXT: csinv w0, w9, wzr, eq ; CHECK-NEXT: ret %noty = xor i8 %y, -1 %a = add i8 %x, %y @@ -247,11 +247,11 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) { ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mvn w9, w1 -; CHECK-NEXT: add w10, w0, w1 -; CHECK-NEXT: cmp w8, w9, uxth -; CHECK-NEXT: csinv w0, w10, wzr, ls +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: add w8, w8, w0, uxth +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: tst w8, #0x10000 +; CHECK-NEXT: csinv w0, w9, wzr, eq ; CHECK-NEXT: ret %noty = xor i16 %y, -1 %a = add i16 %x, %y @@ -290,10 +290,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) { define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) { ; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: add w9, w0, w1 -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csinv w0, w9, wzr, ls +; CHECK-NEXT: cmn w1, w0 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: csinv w0, w8, wzr, lo ; CHECK-NEXT: ret %noty = xor i32 %y, -1 %a = add i32 %x, %y @@ -332,10 +331,9 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) { ; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: add x9, x0, x1 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csinv x0, x9, xzr, ls +; CHECK-NEXT: cmn x1, x0 +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: csinv x0, x8, xzr, lo ; CHECK-NEXT: ret %noty = xor i64 %y, -1 %a = add i64 %x, %y diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll index 4f5ec6fbede8a..23b91c01dd6c3 100644 --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -211,14 +211,10 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: # kill: def $esi killed $esi def $rsi -; ANY-NEXT: # kill: def $edi killed $edi def $rdi -; ANY-NEXT: leal (%rdi,%rsi), %eax -; ANY-NEXT: notb %sil -; ANY-NEXT: cmpb %sil, %dil -; ANY-NEXT: movzbl %al, %ecx +; ANY-NEXT: addb %dil, %sil +; ANY-NEXT: movzbl %sil, %ecx ; ANY-NEXT: movl $255, %eax -; ANY-NEXT: cmovbel %ecx, %eax +; ANY-NEXT: cmovael %ecx, %eax ; ANY-NEXT: # kill: def $al killed $al killed $eax ; ANY-NEXT: retq %noty = xor i8 %y, -1 @@ -263,13 +259,9 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) { ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: # kill: def $esi killed $esi def $rsi -; ANY-NEXT: # kill: def $edi killed $edi def $rdi -; ANY-NEXT: leal (%rdi,%rsi), %ecx -; ANY-NEXT: notl %esi -; ANY-NEXT: cmpw %si, %di +; ANY-NEXT: addw %di, %si ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF -; ANY-NEXT: cmovbel %ecx, %eax +; ANY-NEXT: cmovael %esi, %eax ; ANY-NEXT: # kill: def $ax killed $ax killed $eax ; ANY-NEXT: retq %noty = xor i16 %y, -1 @@ -312,13 +304,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) { define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) { ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: # kill: def $esi killed $esi def $rsi -; ANY-NEXT: # kill: def $edi killed $edi def $rdi -; ANY-NEXT: leal (%rdi,%rsi), %ecx -; ANY-NEXT: notl %esi -; ANY-NEXT: cmpl %esi, %edi +; ANY-NEXT: addl %esi, %edi ; ANY-NEXT: movl $-1, %eax -; ANY-NEXT: cmovbel %ecx, %eax +; ANY-NEXT: cmovael %edi, %eax ; ANY-NEXT: retq %noty = xor i32 %y, -1 %a = add i32 %x, %y @@ -359,11 +347,9 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) { ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: leaq (%rdi,%rsi), %rcx -; ANY-NEXT: notq %rsi -; ANY-NEXT: cmpq %rsi, %rdi +; ANY-NEXT: addq %rsi, %rdi ; ANY-NEXT: movq $-1, %rax -; ANY-NEXT: cmovbeq %rcx, %rax +; ANY-NEXT: cmovaeq %rdi, %rax ; ANY-NEXT: retq %noty = xor i64 %y, -1 %a = add i64 %x, %y diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll index e1408ffdeb19a..487b639f9b35c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll @@ -102,9 +102,9 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp { ; pattern as well. define i64 @uaddo6_xor(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor( -; CHECK-NEXT: [[X:%.*]] = xor i64 [[A:%.*]], -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; %x = xor i64 %a, -1 @@ -115,13 +115,13 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) { define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor_commuted( -; CHECK-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; - %x = xor i64 -1, %a - %cmp = icmp ult i64 %x, %b + %x = xor i64 %a, -1 + %cmp = icmp ugt i64 %b, %x %Q = select i1 %cmp, i64 %b, i64 42 ret i64 %Q } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll index 5cf408a66100b..a25e0cf2162b8 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -153,9 +153,9 @@ exit: ; pattern as well. define i64 @uaddo6_xor(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor( -; CHECK-NEXT: [[X:%.*]] = xor i64 [[A:%.*]], -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; %x = xor i64 %a, -1 @@ -166,12 +166,12 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) { define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor_commuted( -; CHECK-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; - %x = xor i64 -1, %a + %x = xor i64 %a, -1 %cmp = icmp ult i64 %x, %b %Q = select i1 %cmp, i64 %b, i64 42 ret i64 %Q @@ -194,6 +194,23 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { ret i64 %Q } +; Make sure we do not use the XOR binary operator as insert point, as it may +; come before the second operand of the overflow intrinsic. +define i1 @uaddo6_xor_op_after_XOR(i32 %a, i32* %b.ptr) { +; CHECK-LABEL: @uaddo6_xor_op_after_XOR( +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A:%.*]], i32 [[B]]) +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OV:%.*]] = xor i1 [[OV1]], true +; CHECK-NEXT: ret i1 [[OV]] +; + %x = xor i32 %a, -1 + %b = load i32, i32* %b.ptr, align 8 + %cmp14 = icmp ugt i32 %b, %x + %ov = xor i1 %cmp14, true + ret i1 %ov +} + ; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization. ; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754