Skip to content

Commit

Permalink
[DAGCombiner] try to convert opposing shifts to casts
Browse files Browse the repository at this point in the history
This reverses a questionable IR canonicalization when a truncate
is free:

sra (add (shl X, N1C), AddC), N1C -->
sext (add (trunc X to (width - N1C)), AddC')

https://rise4fun.com/Alive/slRC

More details in PR42644:
https://bugs.llvm.org/show_bug.cgi?id=42644

I limited this to pre-legalization for code simplicity because that
should be enough to reverse the IR patterns. I don't have any
evidence (no regression test diffs) that we need to try this later.

Differential Revision: https://reviews.llvm.org/D65607

llvm-svn: 367710
  • Loading branch information
rotateright committed Aug 2, 2019
1 parent d5765ee commit 6826455
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 38 deletions.
26 changes: 26 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7616,6 +7616,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}

// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
if (!LegalOperations && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
N0.getOperand(0).getOpcode() == ISD::SHL &&
N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
SDValue Shl = N0.getOperand(0);
// Determine what the truncate's type would be and ask the target if that
// is a free operation.
LLVMContext &Ctx = *DAG.getContext();
unsigned ShiftAmt = N1C->getZExtValue();
EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
if (VT.isVector())
TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
if (TLI.isTruncateFree(VT, TruncVT)) {
SDLoc DL(N);
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
return DAG.getSExtOrTrunc(Add, DL, VT);
}
}
}

// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AArch64/shift-mod.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,8 @@ entry:
define i64 @ashr_add_shl_i32(i64 %r) {
; CHECK-LABEL: ashr_add_shl_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4294967296
; CHECK-NEXT: add x8, x8, x0, lsl #32
; CHECK-NEXT: asr x0, x8, #32
; CHECK-NEXT: add w8, w0, #1 // =1
; CHECK-NEXT: sxtw x0, w8
; CHECK-NEXT: ret
%conv = shl i64 %r, 32
%sext = add i64 %conv, 4294967296
Expand All @@ -91,9 +90,8 @@ define i64 @ashr_add_shl_i32(i64 %r) {
define i64 @ashr_add_shl_i8(i64 %r) {
; CHECK-LABEL: ashr_add_shl_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #72057594037927936
; CHECK-NEXT: add x8, x8, x0, lsl #56
; CHECK-NEXT: asr x0, x8, #56
; CHECK-NEXT: add w8, w0, #1 // =1
; CHECK-NEXT: sxtb x0, w8
; CHECK-NEXT: ret
%conv = shl i64 %r, 56
%sext = add i64 %conv, 72057594037927936
Expand Down
56 changes: 24 additions & 32 deletions llvm/test/CodeGen/X86/shift-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,8 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
;
; X64-LABEL: ashr_add_shl_i32:
; X64: # %bb.0:
; X64-NEXT: shlq $32, %rdi
; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: sarq $32, %rax
; X64-NEXT: incl %edi
; X64-NEXT: movslq %edi, %rax
; X64-NEXT: retq
%conv = shl i64 %r, 32
%sext = add i64 %conv, 4294967296
Expand All @@ -182,20 +180,17 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
define i64 @ashr_add_shl_i8(i64 %r) nounwind {
; X32-LABEL: ashr_add_shl_i8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: shll $24, %edx
; X32-NEXT: addl $33554432, %edx # imm = 0x2000000
; X32-NEXT: movl %edx, %eax
; X32-NEXT: sarl $24, %eax
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: addb $2, %al
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: movl %eax, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: retl
;
; X64-LABEL: ashr_add_shl_i8:
; X64: # %bb.0:
; X64-NEXT: shlq $56, %rdi
; X64-NEXT: movabsq $144115188075855872, %rax # imm = 0x200000000000000
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: sarq $56, %rax
; X64-NEXT: addb $2, %dil
; X64-NEXT: movsbq %dil, %rax
; X64-NEXT: retq
%conv = shl i64 %r, 56
%sext = add i64 %conv, 144115188075855872
Expand All @@ -209,34 +204,31 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind {
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: shll $24, %edi
; X32-NEXT: shll $24, %esi
; X32-NEXT: shll $24, %edx
; X32-NEXT: shll $24, %ecx
; X32-NEXT: addl $16777216, %ecx # imm = 0x1000000
; X32-NEXT: addl $16777216, %edx # imm = 0x1000000
; X32-NEXT: addl $16777216, %esi # imm = 0x1000000
; X32-NEXT: addl $16777216, %edi # imm = 0x1000000
; X32-NEXT: sarl $24, %edi
; X32-NEXT: sarl $24, %esi
; X32-NEXT: sarl $24, %edx
; X32-NEXT: sarl $24, %ecx
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movb {{[0-9]+}}(%esp), %dl
; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
; X32-NEXT: movb {{[0-9]+}}(%esp), %dh
; X32-NEXT: incb %dh
; X32-NEXT: movsbl %dh, %esi
; X32-NEXT: incb %ch
; X32-NEXT: movsbl %ch, %edi
; X32-NEXT: incb %dl
; X32-NEXT: movsbl %dl, %edx
; X32-NEXT: incb %cl
; X32-NEXT: movsbl %cl, %ecx
; X32-NEXT: movl %ecx, 12(%eax)
; X32-NEXT: movl %edx, 8(%eax)
; X32-NEXT: movl %esi, 4(%eax)
; X32-NEXT: movl %edi, (%eax)
; X32-NEXT: movl %edi, 4(%eax)
; X32-NEXT: movl %esi, (%eax)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: retl $4
;
; X64-LABEL: ashr_add_shl_v4i8:
; X64: # %bb.0:
; X64-NEXT: pcmpeqd %xmm1, %xmm1
; X64-NEXT: psubd %xmm1, %xmm0
; X64-NEXT: pslld $24, %xmm0
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
; X64-NEXT: psrad $24, %xmm0
; X64-NEXT: retq
%conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24>
Expand Down

0 comments on commit 6826455

Please sign in to comment.