diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 18016e93fbdd01..093fe58106b268 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3835,6 +3835,24 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, if (SDValue R = getRotateSource(N0)) return DAG.getSetCC(dl, VT, R, N1, Cond); + // Peek through an 'or' of a rotated value compared against 0: + // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0 + // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0 + // + // TODO: Add the 'and' with -1 sibling. + // TODO: Recurse through a series of 'or' ops to find the rotate. + EVT OpVT = N0.getValueType(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) { + if (SDValue R = getRotateSource(N0.getOperand(0))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (SDValue R = getRotateSource(N0.getOperand(1))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/legalize-shift.ll b/llvm/test/CodeGen/X86/legalize-shift.ll index 8113311134ab3a..bc5764e586dece 100644 --- a/llvm/test/CodeGen/X86/legalize-shift.ll +++ b/llvm/test/CodeGen/X86/legalize-shift.ll @@ -5,16 +5,18 @@ define void @PR36250() nounwind { ; X86-LABEL: PR36250: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: roll %ecx -; X86-NEXT: addl %eax, %eax -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: leal (%eax,%eax), %edx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: orl %eax, %esi ; X86-NEXT: sete (%eax) +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: PR36250: @@ -22,11 +24,10 @@ define void @PR36250() nounwind { ; X64-NEXT: movq (%rax), %rax ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: rolq %rcx -; X64-NEXT: addq %rax, %rax -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: orq %rax, %rdx -; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: leaq (%rax,%rax), %rdx +; X64-NEXT: orq %rcx, %rcx +; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: orq %rax, %rcx ; X64-NEXT: sete (%rax) ; X64-NEXT: retq %1 = load i448, i448* undef diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll index f42f1ea5a96bf0..a345cf30f9d2ea 100644 --- a/llvm/test/CodeGen/X86/setcc-fsh.ll +++ b/llvm/test/CodeGen/X86/setcc-fsh.ll @@ -188,9 +188,6 @@ define i1 @fshl_eq_n1(i8 %x, i8 %y, i8 %z) nounwind { define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: or_rotl_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: rolb %cl, %dil ; CHECK-NEXT: orb %sil, %dil ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq @@ -203,9 +200,6 @@ define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind { define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-LABEL: or_rotr_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NEXT: rorq %cl, %rdi ; CHECK-NEXT: orq %rsi, %rdi ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq @@ -215,6 +209,8 @@ define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind { ret i1 %r } +; negative test - wrong constant + define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: or_rotl_ne_n1: ; CHECK: # %bb.0: @@ -231,6 +227,8 @@ define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind { ret i1 %r } +; negative test - extra use + define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-LABEL: or_rotl_ne_0_use: ; CHECK: # %bb.0: @@ -254,25 +252,9 @@ define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind { define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: or_rotl_ne_eq0: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31] -; CHECK-NEXT: pand %xmm1, %xmm2 -; CHECK-NEXT: pslld $23, %xmm2 -; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; CHECK-NEXT: cvttps2dq %xmm2, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-NEXT: pmuludq %xmm2, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; CHECK-NEXT: pmuludq %xmm3, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3] -; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-NEXT: por %xmm1, %xmm4 -; CHECK-NEXT: por %xmm0, %xmm4 -; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm4, %xmm0 +; CHECK-NEXT: pxor %xmm2, %xmm2 +; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm0 ; CHECK-NEXT: retq %rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y) %or = or <4 x i32> %y, %rot