Skip to content

Commit

Permalink
[SelectionDAG] Respect multiple uses in SimplifyDemandedBits to Simpl…
Browse files Browse the repository at this point in the history
…ifyDemandedVectorElts simplification

rL343913 was using SimplifyDemandedBits's original demanded mask instead of the adjusted 'NewMask' that accounts for multiple uses of the op (those variable names really need improving....).

Annoyingly many of the test changes (back to pre-rL343913 state) are actually safe - but only because their multiple uses are all by PMULDQ/PMULUDQ.

Thanks to Jan Vesely (@jvesely) for bisecting the bug.

llvm-svn: 343935
  • Loading branch information
RKSimon committed Oct 7, 2018
1 parent 012fda5 commit 3b04a4e
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 104 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,7 +1219,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
DemandedSubElts = APInt::getNullValue(Scale);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedMask.extractBits(NumSrcEltBits, Offset);
APInt Sub = NewMask.extractBits(NumSrcEltBits, Offset);
if (Sub.isAllOnesValue())
DemandedSubElts.setBit(i);
else if (!Sub.isNullValue())
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1826,6 +1826,12 @@ declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind rea
define <4 x i64> @test_mm256_mul_epi32(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_mul_epi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllq $32, %ymm0, %ymm2
; CHECK-NEXT: vpsrad $31, %ymm2, %ymm2
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
; CHECK-NEXT: vpsllq $32, %ymm1, %ymm2
; CHECK-NEXT: vpsrad $31, %ymm2, %ymm2
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%A = shl <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32>
Expand All @@ -1840,6 +1846,9 @@ declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
define <4 x i64> @test_mm256_mul_epu32(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_mul_epu32:
; CHECK: # %bb.0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%A = and <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1718,6 +1718,11 @@ entry:
define <8 x i64> @test_mm512_mul_epu32(<8 x i64> %__A, <8 x i64> %__B) nounwind {
; CHECK-LABEL: test_mm512_mul_epu32:
; CHECK: # %bb.0:
; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
; CHECK-NEXT: kmovw %eax, %k0
; CHECK-NEXT: knotw %k0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%tmp = and <8 x i64> %__A, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
Expand Down
113 changes: 67 additions & 46 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1318,55 +1318,76 @@ entry:
define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
; SSE2-LABEL: mul_v8i64_sext:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm2, %xmm10
; SSE2-NEXT: movdqa %xmm1, %xmm9
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: movdqa %xmm0, %xmm8
; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
; SSE2-NEXT: movdqa %xmm6, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: psrad $16, %xmm6
; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm7, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: psrad $16, %xmm7
; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm5, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm10[2,1,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[0,1,1,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm9[2,1,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[0,1,1,3]
; SSE2-NEXT: pmuludq %xmm9, %xmm4
; SSE2-NEXT: pxor %xmm12, %xmm12
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1]
; SSE2-NEXT: pmuludq %xmm9, %xmm0
; SSE2-NEXT: psllq $32, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: movdqa %xmm5, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
; SSE2-NEXT: pxor %xmm7, %xmm7
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm5, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm0, %xmm4
; SSE2-NEXT: paddq %xmm3, %xmm4
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm8[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pmuludq %xmm5, %xmm0
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: psrad $16, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; SSE2-NEXT: psllq $32, %xmm4
; SSE2-NEXT: paddq %xmm4, %xmm0
; SSE2-NEXT: pmuludq %xmm11, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[1],xmm12[1]
; SSE2-NEXT: pmuludq %xmm11, %xmm1
; SSE2-NEXT: psllq $32, %xmm1
; SSE2-NEXT: paddq %xmm5, %xmm1
; SSE2-NEXT: pmuludq %xmm10, %xmm6
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm12[0],xmm2[1],xmm12[1]
; SSE2-NEXT: pmuludq %xmm10, %xmm2
; SSE2-NEXT: psllq $32, %xmm2
; SSE2-NEXT: paddq %xmm6, %xmm2
; SSE2-NEXT: pmuludq %xmm8, %xmm7
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm12[0],xmm3[1],xmm12[1]
; SSE2-NEXT: pmuludq %xmm8, %xmm3
; SSE2-NEXT: psllq $32, %xmm3
; SSE2-NEXT: paddq %xmm7, %xmm3
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm1, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm3, %xmm4
; SSE2-NEXT: paddq %xmm5, %xmm4
; SSE2-NEXT: movdqa %xmm6, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: psrad $16, %xmm6
; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1]
; SSE2-NEXT: pmuludq %xmm3, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
; SSE2-NEXT: psllq $32, %xmm4
; SSE2-NEXT: paddq %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm2, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm6, %xmm4
; SSE2-NEXT: paddq %xmm5, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm8[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pmuludq %xmm6, %xmm2
; SSE2-NEXT: movdqa %xmm5, %xmm6
; SSE2-NEXT: psrad $31, %xmm6
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
; SSE2-NEXT: psllq $32, %xmm4
; SSE2-NEXT: paddq %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1]
; SSE2-NEXT: pmuludq %xmm3, %xmm6
; SSE2-NEXT: pmuludq %xmm5, %xmm4
; SSE2-NEXT: paddq %xmm6, %xmm4
; SSE2-NEXT: pmuludq %xmm5, %xmm3
; SSE2-NEXT: psllq $32, %xmm4
; SSE2-NEXT: paddq %xmm4, %xmm3
; SSE2-NEXT: retq
;
; SSE41-LABEL: mul_v8i64_sext:
Expand Down
28 changes: 24 additions & 4 deletions llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2755,13 +2755,23 @@ define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone

define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; SSE-LABEL: test_mm_mul_epu32:
; SSE: # %bb.0:
; SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
; X86-SSE-LABEL: test_mm_mul_epu32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
; X86-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
; X86-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_mul_epu32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
; AVX1-NEXT: vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc]
; AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; AVX1-NEXT: vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc]
; AVX1-NEXT: # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
Expand All @@ -2774,6 +2784,16 @@ define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_mul_epu32:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
; X64-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
; X64-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
; X64-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
; X64-SSE-NEXT: retq # encoding: [0xc3]
%A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295>
%B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295>
%res = mul nuw <2 x i64> %A, %B
Expand Down
17 changes: 16 additions & 1 deletion llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -832,11 +832,26 @@ declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind rea
define <2 x i64> @test_mm_mul_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; SSE-LABEL: test_mm_mul_epi32:
; SSE: # %bb.0:
; SSE-NEXT: pmuldq %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: psllq $32, %xmm2
; SSE-NEXT: psrad $31, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: psllq $32, %xmm0
; SSE-NEXT: psrad $31, %xmm0
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; SSE-NEXT: pmuldq %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX1-LABEL: test_mm_mul_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm2
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; AVX1-NEXT: vpsllq $32, %xmm1, %xmm2
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: ret{{[l|q]}}
;
Expand Down
56 changes: 27 additions & 29 deletions llvm/test/CodeGen/X86/vector-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ define <16 x i8> @mul_v16i8_neg5(<16 x i8> %a0) nounwind {
define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_17_65:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <17,u,65,u>
; X86-NEXT: movdqa {{.*#+}} xmm1 = [17,0,65,0]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: psrlq $32, %xmm0
Expand Down Expand Up @@ -809,7 +809,7 @@ define <16 x i8> @mul_v16i8_neg15(<16 x i8> %a0) nounwind {
define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_15_63:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <15,u,63,u>
; X86-NEXT: movdqa {{.*#+}} xmm1 = [15,0,63,0]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: psrlq $32, %xmm0
Expand Down Expand Up @@ -845,17 +845,16 @@ define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_neg_15_63:
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = <4294967281,u,4294967233,u>
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
; X86-NEXT: pmuludq %xmm0, %xmm3
; X86-NEXT: paddq %xmm1, %xmm3
; X86-NEXT: psllq $32, %xmm3
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: paddq %xmm3, %xmm0
; X86-NEXT: pcmpeqd %xmm1, %xmm1
; X86-NEXT: pmuludq %xmm0, %xmm1
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: psrlq $32, %xmm2
; X86-NEXT: movdqa {{.*#+}} xmm3 = [4294967281,4294967295,4294967233,4294967295]
; X86-NEXT: pmuludq %xmm3, %xmm2
; X86-NEXT: paddq %xmm1, %xmm2
; X86-NEXT: psllq $32, %xmm2
; X86-NEXT: pmuludq %xmm3, %xmm0
; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_neg_15_63:
Expand Down Expand Up @@ -890,17 +889,16 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_neg_17_65:
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = <4294967279,u,4294967231,u>
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
; X86-NEXT: pmuludq %xmm0, %xmm3
; X86-NEXT: paddq %xmm1, %xmm3
; X86-NEXT: psllq $32, %xmm3
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: paddq %xmm3, %xmm0
; X86-NEXT: pcmpeqd %xmm1, %xmm1
; X86-NEXT: pmuludq %xmm0, %xmm1
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: psrlq $32, %xmm2
; X86-NEXT: movdqa {{.*#+}} xmm3 = [4294967279,4294967295,4294967231,4294967295]
; X86-NEXT: pmuludq %xmm3, %xmm2
; X86-NEXT: paddq %xmm1, %xmm2
; X86-NEXT: psllq $32, %xmm2
; X86-NEXT: pmuludq %xmm3, %xmm0
; X86-NEXT: paddq %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: mul_v2i64_neg_17_65:
Expand Down Expand Up @@ -935,7 +933,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
; X86-LABEL: mul_v2i64_0_1:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <0,u,1,u>
; X86-NEXT: movdqa {{.*#+}} xmm1 = [0,0,1,0]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: psrlq $32, %xmm0
Expand Down Expand Up @@ -977,7 +975,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = <0,u,4294967295,u>
; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295]
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
Expand Down Expand Up @@ -1031,7 +1029,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: movdqa {{.*#+}} xmm2 = <15,u,4294967233,u>
; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295]
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psrlq $32, %xmm3
Expand Down Expand Up @@ -1174,7 +1172,7 @@ define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8>
define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind {
; X86-LABEL: mul_v2i64_68_132:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <68,u,132,u>
; X86-NEXT: movdqa {{.*#+}} xmm1 = [68,0,132,0]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: psrlq $32, %xmm0
Expand Down Expand Up @@ -1210,7 +1208,7 @@ define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind {
define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind {
; X86-LABEL: mul_v2i64_60_120:
; X86: # %bb.0:
; X86-NEXT: movdqa {{.*#+}} xmm1 = <60,u,124,u>
; X86-NEXT: movdqa {{.*#+}} xmm1 = [60,0,124,0]
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: psrlq $32, %xmm0
Expand Down

0 comments on commit 3b04a4e

Please sign in to comment.