diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3d213d3449b8a..ed8d884f843a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9746,6 +9746,37 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } + if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) { + APInt MinVal = APInt::getSignedMinValue(BitWidth); + APInt MaxVal = APInt::getSignedMaxValue(BitWidth); + + KnownBits KnownLHS = DAG.computeKnownBits(LHS); + KnownBits KnownRHS = DAG.computeKnownBits(RHS); + + // If either of the operand signs are known, then they are guaranteed to + // only saturate in one direction. If non-negative they will saturate + // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN. + // + // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the + // sign of 'y' has to be flipped. + + bool LHSIsNonNegative = KnownLHS.isNonNegative(); + bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() + : KnownRHS.isNegative(); + if (LHSIsNonNegative || RHSIsNonNegative) { + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff); + } + + bool LHSIsNegative = KnownLHS.isNegative(); + bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative() + : KnownRHS.isNonNegative(); + if (LHSIsNegative || RHSIsNegative) { + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff); + } + } + // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff APInt MinVal = APInt::getSignedMinValue(BitWidth); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); diff --git a/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll b/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll index 9c6cdeeb82d72..06cb13429b573 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll @@ -4,10 +4,9 @@ define i64 @test_ssub_nonneg_rhs(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 1) ret i64 %sat @@ -16,10 +15,9 @@ define i64 @test_ssub_nonneg_rhs(i64 %x) { define i64 @test_ssub_neg_rhs(i64 %x) { ; CHECK-LABEL: test_ssub_neg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 -1) ret i64 %sat @@ -28,10 +26,9 @@ define i64 @test_ssub_neg_rhs(i64 %x) { define i64 @test_sadd_nonneg_rhs(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 1) ret i64 %sat @@ -41,10 +38,9 @@ define i64 @test_sadd_nonneg_rhs(i64 %x) { define i64 @test_sadd_neg_rhs(i64 %x) { ; CHECK-LABEL: test_sadd_neg_rhs: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 -1) ret i64 %sat @@ -54,9 +50,8 @@ define i64 @test_ssub_nonneg_lhs(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov x9, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 1, i64 %x) @@ -67,9 +62,8 @@ define i64 @test_ssub_neg_lhs(i64 %x) { ; CHECK-LABEL: test_ssub_neg_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.ssub.sat.i64(i64 -1, i64 %x) @@ -79,10 +73,9 @@ define i64 @test_ssub_neg_lhs(i64 %x) { define i64 @test_sadd_nonneg_lhs(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_lhs: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 1, i64 %x) ret i64 %sat @@ -91,10 +84,9 @@ define i64 @test_sadd_nonneg_lhs(i64 %x) { define i64 @test_sadd_neg_lhs(i64 %x) { ; CHECK-LABEL: test_sadd_neg_lhs: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, #1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %sat = call i64 @llvm.sadd.sat.i64(i64 -1, i64 %x) ret i64 %sat @@ -104,10 +96,9 @@ define i64 @test_ssub_nonneg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_rhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: subs x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %y = and i64 %x, 123 @@ -119,11 +110,10 @@ define i64 @test_ssub_neg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_neg_rhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmn x0, #1 -; CHECK-NEXT: csinv x8, x0, xzr, lt -; CHECK-NEXT: subs x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinv x9, x0, xzr, lt +; CHECK-NEXT: subs x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smin(i64 %x, i64 -1) %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y) @@ -134,11 +124,10 @@ define i64 @test_sadd_nonneg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_rhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, #1 -; CHECK-NEXT: csinc x8, x0, xzr, gt -; CHECK-NEXT: adds x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinc x9, x0, xzr, gt +; CHECK-NEXT: adds x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smax(i64 %x, i64 1) %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) @@ -149,11 +138,10 @@ define i64 @test_sadd_nonneg_rhs_nonconst(i64 %x) { define i64 @test_sadd_neg_rhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_neg_rhs_nonconst: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, #0x8000000000000000 -; CHECK-NEXT: adds x8, x0, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: orr x9, x0, #0x8000000000000000 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: adds x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = or i64 %x, u0x8000000000000000 %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) @@ -164,10 +152,9 @@ define i64 @test_ssub_nonneg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_nonneg_lhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: mov x9, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 ; CHECK-NEXT: csel x0, x9, x8, vs ; CHECK-NEXT: ret %y = and i64 %x, 123 @@ -179,11 +166,10 @@ define i64 @test_ssub_neg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_ssub_neg_lhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmn x0, #1 -; CHECK-NEXT: csinv x8, x0, xzr, lt -; CHECK-NEXT: subs x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: csinv x9, x0, xzr, lt +; CHECK-NEXT: subs x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smin(i64 %x, i64 -1) %sat = call i64 @llvm.ssub.sat.i64(i64 %y, i64 %x) @@ -194,11 +180,10 @@ define i64 @test_sadd_nonneg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_nonneg_lhs_nonconst: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, #1 -; CHECK-NEXT: csinc x8, x0, xzr, gt -; CHECK-NEXT: adds x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinc x9, x0, xzr, gt +; CHECK-NEXT: adds x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = call i64 @llvm.smax(i64 %x, i64 1) %sat = call i64 @llvm.sadd.sat.i64(i64 %y, i64 %x) @@ -208,11 +193,10 @@ define i64 @test_sadd_nonneg_lhs_nonconst(i64 %x) { define i64 @test_sadd_neg_lhs_nonconst(i64 %x) { ; CHECK-LABEL: test_sadd_neg_lhs_nonconst: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, #0x8000000000000000 -; CHECK-NEXT: adds x8, x8, x0 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: orr x9, x0, #0x8000000000000000 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: adds x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs ; CHECK-NEXT: ret %y = or i64 %x, u0x8000000000000000 %sat = call i64 @llvm.sadd.sat.i64(i64 %y, i64 %x) diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll index 10decfda437e5..3e217980d4a77 100644 --- a/llvm/test/CodeGen/X86/combine-add-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -77,11 +77,8 @@ define <8 x i16> @combine_constfold_undef_v8i16() { define i32 @combine_constant_i32(i32 %a0) { ; CHECK-LABEL: combine_constant_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; CHECK-NEXT: incl %edi +; CHECK-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: cmovnol %edi, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.sadd.sat.i32(i32 1, i32 %a0)