Skip to content

Commit

Permalink
[SDAG] fold sub-of-shift to add-of-shift
Browse files Browse the repository at this point in the history
This fold is done in IR:
https://alive2.llvm.org/ce/z/jWyFrP

There is an x86 test that shows an improvement
from the added flexibility of using add (commutative).

The other diffs are presumed neutral.

Note that this could also be folded to an 'xor',
but I'm not sure if that would be universally better
(eg, x86 can convert adds more easily into LEA).

This helps prevent regressions from a potential fold for
issue #53829.
  • Loading branch information
rotateright committed Feb 18, 2022
1 parent bf296ea commit a2963d8
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 11 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -3656,6 +3656,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}

// As with the previous fold, prefer add for more folding potential.
// Subtracting SMIN/0 is the same as adding SMIN/0:
// N0 - (X << BW-1) --> N0 + (X << BW-1)
if (N1.getOpcode() == ISD::SHL) {
ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
}

if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
Expand Up @@ -208,7 +208,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-NEXT: movi v3.4s, #128, lsl #24
; CHECK-NEXT: usra v1.4s, v2.4s, #1
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/combine-srem.ll
Expand Up @@ -74,7 +74,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_vec_srem_by_minsigned:
Expand All @@ -83,7 +83,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_vec_srem_by_minsigned:
Expand All @@ -93,7 +93,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
%1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
ret <4 x i32> %1
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/X86/imul.ll
Expand Up @@ -529,9 +529,8 @@ define i64 @testNegOverflow(i64 %a) {
; X64-LABEL: testNegOverflow:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: shlq $63, %rcx
; X64-NEXT: subq %rcx, %rax
; X64-NEXT: shlq $63, %rax
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: retq
;
; X86-LABEL: testNegOverflow:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
Expand Up @@ -622,7 +622,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-SSE-NEXT: psrld $1, %xmm1
; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-SSE-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE-NEXT: paddd %xmm1, %xmm0
; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE-NEXT: psrld $31, %xmm0
Expand All @@ -634,7 +634,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Expand All @@ -647,7 +647,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Expand All @@ -659,7 +659,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
Expand Down

0 comments on commit a2963d8

Please sign in to comment.