Skip to content

Commit

Permalink
[X86] Support X86ISD::PCMPEQ and X86ISD::PCMPGT in ComputeKnownBits
Browse files Browse the repository at this point in the history
These functions where missing support but are used enough that it
makes sense to track them.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D148963
  • Loading branch information
goldsteinn committed Apr 27, 2023
1 parent 75b48b4 commit ddfee6d
Show file tree
Hide file tree
Showing 9 changed files with 3,608 additions and 6,343 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38595,6 +38595,23 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero.setBitsFrom(16);
break;
}
case X86ISD::PCMPGT:
case X86ISD::PCMPEQ: {
KnownBits KnownLhs =
DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
KnownBits KnownRhs =
DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
std::optional<bool> Res = Opc == X86ISD::PCMPEQ
? KnownBits::eq(KnownLhs, KnownRhs)
: KnownBits::sgt(KnownLhs, KnownRhs);
if (Res) {
if (*Res)
Known.setAllOnes();
else
Known.setAllZero();
}
break;
}
case X86ISD::PMULUDQ: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Expand Down
53 changes: 22 additions & 31 deletions llvm/test/CodeGen/X86/combine-srl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -346,23 +346,18 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: pshufb %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm0, %xmm3
; SSE-NEXT: psrlw $4, %xmm3
; SSE-NEXT: pxor %xmm4, %xmm4
; SSE-NEXT: pshufb %xmm3, %xmm1
; SSE-NEXT: pcmpeqb %xmm4, %xmm3
; SSE-NEXT: pand %xmm2, %xmm3
; SSE-NEXT: paddb %xmm1, %xmm3
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: pcmpeqb %xmm4, %xmm1
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: pand %xmm3, %xmm1
; SSE-NEXT: psrlw $8, %xmm3
; SSE-NEXT: paddw %xmm1, %xmm3
; SSE-NEXT: pcmpeqw %xmm4, %xmm0
; SSE-NEXT: psrlw $4, %xmm0
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: pshufb %xmm0, %xmm1
; SSE-NEXT: pcmpeqb %xmm3, %xmm0
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: paddb %xmm1, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7]
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: pand %xmm3, %xmm0
; SSE-NEXT: psrld $16, %xmm3
; SSE-NEXT: paddd %xmm3, %xmm0
; SSE-NEXT: psrld $5, %xmm0
; SSE-NEXT: retq
Expand All @@ -372,22 +367,18 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm4
; AVX-NEXT: vpand %xmm4, %xmm2, %xmm2
; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7]
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $5, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = and <4 x i32> %x, <i32 4, i32 32, i32 64, i32 128>
Expand Down
186 changes: 82 additions & 104 deletions llvm/test/CodeGen/X86/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,16 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: pcmpgtd %xmm2, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-NEXT: por %xmm0, %xmm2
; CHECK-NEXT: pand %xmm2, %xmm1
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-NEXT: por %xmm1, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; CHECK-NEXT: pand %xmm0, %xmm1
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: retq
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
Expand Down Expand Up @@ -276,31 +274,27 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
; CHECK-NEXT: movdqa %xmm0, %xmm4
; CHECK-NEXT: pxor %xmm3, %xmm4
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm3, %xmm5
; CHECK-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259455,9223372039002259455]
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; CHECK-NEXT: movdqa {{.*#+}} xmm6 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: movdqa %xmm6, %xmm7
; CHECK-NEXT: pcmpgtd %xmm4, %xmm7
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
; CHECK-NEXT: pand %xmm5, %xmm4
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
; CHECK-NEXT: por %xmm4, %xmm5
; CHECK-NEXT: pand %xmm5, %xmm0
; CHECK-NEXT: pandn %xmm2, %xmm5
; CHECK-NEXT: por %xmm0, %xmm5
; CHECK-NEXT: pcmpgtd %xmm5, %xmm7
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm3, %xmm4
; CHECK-NEXT: pand %xmm7, %xmm4
; CHECK-NEXT: pand %xmm4, %xmm0
; CHECK-NEXT: pandn %xmm2, %xmm4
; CHECK-NEXT: por %xmm0, %xmm4
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm3, %xmm4
; CHECK-NEXT: pcmpgtd %xmm0, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
; CHECK-NEXT: pand %xmm4, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
; CHECK-NEXT: por %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
; CHECK-NEXT: pcmpgtd %xmm5, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm3, %xmm0
; CHECK-NEXT: pand %xmm6, %xmm0
; CHECK-NEXT: pand %xmm0, %xmm1
; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
; CHECK-NEXT: retq
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
Expand Down Expand Up @@ -560,33 +554,28 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
; CHECK-NEXT: movdqa %xmm0, %xmm3
; CHECK-NEXT: pxor %xmm2, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm4
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259455,9223372039002259455]
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: movdqa %xmm5, %xmm6
; CHECK-NEXT: pcmpgtd %xmm3, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
; CHECK-NEXT: pand %xmm4, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
; CHECK-NEXT: por %xmm3, %xmm4
; CHECK-NEXT: pand %xmm4, %xmm0
; CHECK-NEXT: pandn %xmm1, %xmm4
; CHECK-NEXT: por %xmm0, %xmm4
; CHECK-NEXT: pcmpgtd %xmm4, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
; CHECK-NEXT: pand %xmm6, %xmm3
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pandn %xmm1, %xmm3
; CHECK-NEXT: por %xmm0, %xmm3
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; CHECK-NEXT: movdqa %xmm6, %xmm0
; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
; CHECK-NEXT: pcmpgtd %xmm0, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
; CHECK-NEXT: pand %xmm3, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm6, %xmm2
; CHECK-NEXT: pand %xmm0, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
; CHECK-NEXT: pcmpgtd %xmm4, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: pand %xmm5, %xmm0
; CHECK-NEXT: pand %xmm0, %xmm6
; CHECK-NEXT: pandn %xmm1, %xmm0
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
; CHECK-NEXT: por %xmm6, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down Expand Up @@ -1661,18 +1650,16 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: pcmpgtd %xmm2, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-NEXT: por %xmm0, %xmm2
; CHECK-NEXT: pand %xmm2, %xmm1
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-NEXT: por %xmm1, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; CHECK-NEXT: pand %xmm0, %xmm1
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: retq
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
Expand Down Expand Up @@ -1855,32 +1842,28 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
; CHECK-NEXT: movdqa %xmm0, %xmm3
; CHECK-NEXT: pxor %xmm2, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm4
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259455,9223372039002259455]
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: movdqa %xmm5, %xmm6
; CHECK-NEXT: pcmpgtd %xmm3, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
; CHECK-NEXT: pand %xmm4, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
; CHECK-NEXT: por %xmm3, %xmm4
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
; CHECK-NEXT: pand %xmm4, %xmm0
; CHECK-NEXT: pandn %xmm3, %xmm4
; CHECK-NEXT: por %xmm0, %xmm4
; CHECK-NEXT: pcmpgtd %xmm4, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
; CHECK-NEXT: pand %xmm6, %xmm3
; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pandn %xmm4, %xmm3
; CHECK-NEXT: por %xmm0, %xmm3
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm6
; CHECK-NEXT: pcmpgtd %xmm0, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
; CHECK-NEXT: pand %xmm6, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2]
; CHECK-NEXT: pcmpgtd %xmm6, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: pand %xmm5, %xmm0
; CHECK-NEXT: pand %xmm0, %xmm1
; CHECK-NEXT: pandn %xmm3, %xmm0
; CHECK-NEXT: pandn %xmm4, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
; CHECK-NEXT: retq
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
Expand Down Expand Up @@ -2134,34 +2117,29 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: pxor %xmm1, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm1, %xmm3
; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259455,9223372039002259455]
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-NEXT: movdqa %xmm4, %xmm5
; CHECK-NEXT: pcmpgtd %xmm2, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
; CHECK-NEXT: pand %xmm3, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
; CHECK-NEXT: por %xmm2, %xmm3
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
; CHECK-NEXT: pand %xmm3, %xmm0
; CHECK-NEXT: pandn %xmm2, %xmm3
; CHECK-NEXT: por %xmm0, %xmm3
; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm1, %xmm2
; CHECK-NEXT: pand %xmm5, %xmm2
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
; CHECK-NEXT: pand %xmm2, %xmm0
; CHECK-NEXT: pandn %xmm3, %xmm2
; CHECK-NEXT: por %xmm0, %xmm2
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; CHECK-NEXT: movdqa %xmm6, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm1, %xmm5
; CHECK-NEXT: pcmpgtd %xmm0, %xmm4
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
; CHECK-NEXT: pand %xmm5, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm6, %xmm1
; CHECK-NEXT: pand %xmm0, %xmm1
; CHECK-NEXT: pandn %xmm2, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
; CHECK-NEXT: pcmpgtd %xmm5, %xmm4
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: pand %xmm4, %xmm0
; CHECK-NEXT: pand %xmm0, %xmm6
; CHECK-NEXT: pandn %xmm3, %xmm0
; CHECK-NEXT: por %xmm6, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down
Loading

0 comments on commit ddfee6d

Please sign in to comment.