Skip to content

Commit

Permalink
[SDAG] try harder to remove a rotate from X == 0
Browse files Browse the repository at this point in the history
https://alive2.llvm.org/ce/z/mJP7XP

This can be viewed as expanding the compare into and/or-of-compares:
https://alive2.llvm.org/ce/z/bkZYWE
followed by reduction of each compare.

This could be extended in several ways:
1. There's a (X & Y) == -1 sibling.
2. We can recurse through more than 1 'or'.
3. The fold could be generalized beyond rotates - any operation that
   only changes the order of bits (bswap, bitreverse).

This is a transform noted in D111530.
  • Loading branch information
rotateright committed Mar 3, 2022
1 parent d3c16be commit e9302bf
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 36 deletions.
18 changes: 18 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -3835,6 +3835,24 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
if (SDValue R = getRotateSource(N0))
return DAG.getSetCC(dl, VT, R, N1, Cond);

// Peek through an 'or' of a rotated value compared against 0:
// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
//
// TODO: Add the 'and' with -1 sibling.
// TODO: Recurse through a series of 'or' ops to find the rotate.
EVT OpVT = N0.getValueType();
if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
if (SDValue R = getRotateSource(N0.getOperand(0))) {
SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
}
if (SDValue R = getRotateSource(N0.getOperand(1))) {
SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
}
}

return SDValue();
}

Expand Down
23 changes: 12 additions & 11 deletions llvm/test/CodeGen/X86/legalize-shift.ll
Expand Up @@ -5,28 +5,29 @@
define void @PR36250() nounwind {
; X86-LABEL: PR36250:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: roll %ecx
; X86-NEXT: addl %eax, %eax
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: leal (%eax,%eax), %edx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: orl %ecx, %esi
; X86-NEXT: orl %ecx, %esi
; X86-NEXT: orl %edx, %esi
; X86-NEXT: orl %eax, %esi
; X86-NEXT: sete (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: PR36250:
; X64: # %bb.0:
; X64-NEXT: movq (%rax), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: rolq %rcx
; X64-NEXT: addq %rax, %rax
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: orq %rcx, %rdx
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: orq %rcx, %rdx
; X64-NEXT: leaq (%rax,%rax), %rdx
; X64-NEXT: orq %rcx, %rcx
; X64-NEXT: orq %rdx, %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: sete (%rax)
; X64-NEXT: retq
%1 = load i448, i448* undef
Expand Down
32 changes: 7 additions & 25 deletions llvm/test/CodeGen/X86/setcc-fsh.ll
Expand Up @@ -188,9 +188,6 @@ define i1 @fshl_eq_n1(i8 %x, i8 %y, i8 %z) nounwind {
define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
; CHECK-LABEL: or_rotl_eq_0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: rolb %cl, %dil
; CHECK-NEXT: orb %sil, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
Expand All @@ -203,9 +200,6 @@ define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-LABEL: or_rotr_ne_0:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NEXT: rorq %cl, %rdi
; CHECK-NEXT: orq %rsi, %rdi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
Expand All @@ -215,6 +209,8 @@ define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
ret i1 %r
}

; negative test - wrong constant

define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
; CHECK-LABEL: or_rotl_ne_n1:
; CHECK: # %bb.0:
Expand All @@ -231,6 +227,8 @@ define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
ret i1 %r
}

; negative test - extra use

define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
; CHECK-LABEL: or_rotl_ne_0_use:
; CHECK: # %bb.0:
Expand All @@ -254,25 +252,9 @@ define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: or_rotl_ne_eq0:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31]
; CHECK-NEXT: pand %xmm1, %xmm2
; CHECK-NEXT: pslld $23, %xmm2
; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-NEXT: cvttps2dq %xmm2, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; CHECK-NEXT: pmuludq %xmm2, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-NEXT: pmuludq %xmm3, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: por %xmm1, %xmm4
; CHECK-NEXT: por %xmm0, %xmm4
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: pcmpeqd %xmm4, %xmm0
; CHECK-NEXT: pxor %xmm2, %xmm2
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: retq
%rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y)
%or = or <4 x i32> %y, %rot
Expand Down

0 comments on commit e9302bf

Please sign in to comment.