60 changes: 32 additions & 28 deletions llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -560,17 +560,18 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl $1, %eax
; X86-SSE2-NEXT: movd %eax, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE2-NEXT: pand %xmm1, %xmm0
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-SSE2-NEXT: pand %xmm2, %xmm0
; X86-SSE2-NEXT: pxor %xmm1, %xmm1
; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; X86-SSE2-NEXT: retl
Expand All @@ -586,17 +587,18 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: movd %eax, %xmm2
; X64-SSE2-NEXT: pslld $23, %xmm1
; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pxor %xmm1, %xmm1
; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; X64-SSE2-NEXT: retq
Expand Down Expand Up @@ -656,17 +658,18 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl $1, %eax
; X86-SSE2-NEXT: movd %eax, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE2-NEXT: pand %xmm1, %xmm0
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-SSE2-NEXT: pmuludq {{\.LCPI.*}}, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-SSE2-NEXT: pand %xmm2, %xmm0
; X86-SSE2-NEXT: pxor %xmm1, %xmm1
; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; X86-SSE2-NEXT: retl
Expand All @@ -682,17 +685,18 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: movl $1, %eax
; X64-SSE2-NEXT: movd %eax, %xmm2
; X64-SSE2-NEXT: pslld $23, %xmm1
; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-SSE2-NEXT: pand %xmm1, %xmm0
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X64-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-SSE2-NEXT: pand %xmm2, %xmm0
; X64-SSE2-NEXT: pxor %xmm1, %xmm1
; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; X64-SSE2-NEXT: retq
Expand Down
79 changes: 52 additions & 27 deletions llvm/test/CodeGen/X86/insertelement-ones.ll
Original file line number Diff line number Diff line change
Expand Up @@ -319,30 +319,39 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: movl $255, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pandn %xmm2, %xmm1
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
; SSE3: # %bb.0:
; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE3-NEXT: pand %xmm1, %xmm0
; SSE3-NEXT: movl $255, %eax
; SSE3-NEXT: movd %eax, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSE3-NEXT: movd %eax, %xmm2
; SSE3-NEXT: pandn %xmm2, %xmm1
; SSE3-NEXT: por %xmm1, %xmm0
; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSE3-NEXT: por {{.*}}(%rip), %xmm0
; SSE3-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
; SSE3-NEXT: por %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero
; SSSE3-NEXT: por {{.*}}(%rip), %xmm1
; SSSE3-NEXT: movl $255, %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm2[0]
; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero
; SSSE3-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
; SSSE3-NEXT: por %xmm2, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
Expand All @@ -367,45 +376,61 @@ define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: movl $255, %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
; SSE2-NEXT: por %xmm3, %xmm0
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: por {{.*}}(%rip), %xmm1
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
; SSE2-NEXT: pand %xmm5, %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
; SSE2-NEXT: pandn %xmm3, %xmm5
; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: por %xmm3, %xmm1
; SSE2-NEXT: por %xmm4, %xmm1
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSE3: # %bb.0:
; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE3-NEXT: pand %xmm2, %xmm0
; SSE3-NEXT: movl $255, %eax
; SSE3-NEXT: movd %eax, %xmm2
; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSE3-NEXT: movd %eax, %xmm3
; SSE3-NEXT: pandn %xmm3, %xmm2
; SSE3-NEXT: por %xmm2, %xmm0
; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
; SSE3-NEXT: pand %xmm2, %xmm0
; SSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
; SSE3-NEXT: por %xmm3, %xmm0
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
; SSE3-NEXT: por {{.*}}(%rip), %xmm1
; SSE3-NEXT: movdqa %xmm3, %xmm4
; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
; SSE3-NEXT: por %xmm4, %xmm0
; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
; SSE3-NEXT: pand %xmm5, %xmm1
; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
; SSE3-NEXT: pandn %xmm3, %xmm5
; SSE3-NEXT: por %xmm5, %xmm1
; SSE3-NEXT: pand %xmm2, %xmm1
; SSE3-NEXT: por %xmm3, %xmm1
; SSE3-NEXT: por %xmm4, %xmm1
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
; SSSE3-NEXT: movl $255, %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: movdqa %xmm3, %xmm2
; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm2[0]
; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero
; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
; SSSE3-NEXT: movdqa %xmm3, %xmm0
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSSE3-NEXT: por %xmm0, %xmm2
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
; SSSE3-NEXT: por {{.*}}(%rip), %xmm1
; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0],zero
; SSSE3-NEXT: por %xmm3, %xmm1
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
; SSSE3-NEXT: por %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm0
Expand Down
16 changes: 12 additions & 4 deletions llvm/test/CodeGen/X86/load-partial.ll
Original file line number Diff line number Diff line change
Expand Up @@ -307,17 +307,25 @@ define i32 @load_partial_illegal_type() {
; SSE2: # %bb.0:
; SSE2-NEXT: movzwl {{.*}}(%rip), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: movl $2, %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pslld $16, %xmm2
; SSE2-NEXT: pandn %xmm2, %xmm1
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_partial_illegal_type:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movzwl {{.*}}(%rip), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: movl $2, %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,xmm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[3,4,5,6,7,8,9,10,11,12,13,14,15]
; SSSE3-NEXT: por {{.*}}(%rip), %xmm0
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: movd %xmm0, %eax
; SSSE3-NEXT: retq
;
Expand Down
12 changes: 7 additions & 5 deletions llvm/test/CodeGen/X86/pr30562.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@ define i32 @foo(i64* nocapture %perm, i32 %n) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; CHECK-NEXT: movl $1, %ecx
; CHECK-NEXT: movq %rcx, %xmm0
; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [2,3]
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2,3]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq -24(%rsp,%rcx,8), %rdx
; CHECK-NEXT: movups %xmm0, (%rdi,%rdx,8)
; CHECK-NEXT: movdqu %xmm0, (%rdi,%rdx,8)
; CHECK-NEXT: testq %rdx, %rdx
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/sse3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@ define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1]
; X86-NEXT: movl $1, %edx
; X86-NEXT: movd %edx, %xmm0
; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: t0:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1]
; X64-NEXT: movl $1, %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
Expand Down
136 changes: 85 additions & 51 deletions llvm/test/CodeGen/X86/vector-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1531,7 +1531,9 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
;
; X64-LABEL: mul_v2i64_0_1:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; X64-NEXT: movl $1, %eax
; X64-NEXT: movq %rax, %xmm1
; X64-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: pmuludq %xmm1, %xmm2
; X64-NEXT: psrlq $32, %xmm0
Expand All @@ -1542,7 +1544,9 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
;
; X64-XOP-LABEL: mul_v2i64_0_1:
; X64-XOP: # %bb.0:
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; X64-XOP-NEXT: movl $1, %eax
; X64-XOP-NEXT: vmovq %rax, %xmm1
; X64-XOP-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
Expand All @@ -1552,7 +1556,9 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
;
; X64-AVX2-LABEL: mul_v2i64_0_1:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; X64-AVX2-NEXT: movl $1, %eax
; X64-AVX2-NEXT: vmovq %rax, %xmm1
; X64-AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
Expand All @@ -1562,7 +1568,10 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_0_1:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512DQ-NEXT: movl $1, %eax
; X64-AVX512DQ-NEXT: vmovq %rax, %xmm1
; X64-AVX512DQ-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X64-AVX512DQ-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 0, i64 1>
ret <2 x i64> %1
Expand All @@ -1586,45 +1595,62 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
;
; X64-LABEL: mul_v2i64_neg_0_1:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: pmuludq %xmm1, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm3
; X64-NEXT: psrlq $32, %xmm3
; X64-NEXT: pmuludq %xmm1, %xmm3
; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $32, %xmm1
; X64-NEXT: movq $-1, %rax
; X64-NEXT: movq %rax, %xmm2
; X64-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
; X64-NEXT: pmuludq %xmm2, %xmm1
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: movq %rax, %xmm3
; X64-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; X64-NEXT: pmuludq %xmm0, %xmm3
; X64-NEXT: paddq %xmm1, %xmm3
; X64-NEXT: psllq $32, %xmm3
; X64-NEXT: pmuludq %xmm2, %xmm0
; X64-NEXT: paddq %xmm3, %xmm0
; X64-NEXT: psllq $32, %xmm0
; X64-NEXT: paddq %xmm2, %xmm0
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v2i64_neg_0_1:
; X64-XOP: # %bb.0:
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
; X64-XOP-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm1
; X64-XOP-NEXT: movq $-1, %rax
; X64-XOP-NEXT: vmovq %rax, %xmm2
; X64-XOP-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
; X64-XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; X64-XOP-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-XOP-NEXT: vmovq %rax, %xmm3
; X64-XOP-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; X64-XOP-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; X64-XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1
; X64-XOP-NEXT: vpsllq $32, %xmm1, %xmm1
; X64-XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; X64-XOP-NEXT: retq
;
; X64-AVX2-LABEL: mul_v2i64_neg_0_1:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
; X64-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm1
; X64-AVX2-NEXT: movq $-1, %rax
; X64-AVX2-NEXT: vmovq %rax, %xmm2
; X64-AVX2-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
; X64-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; X64-AVX2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-AVX2-NEXT: vmovq %rax, %xmm3
; X64-AVX2-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm3, %xmm1
; X64-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
; X64-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512DQ-LABEL: mul_v2i64_neg_0_1:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512DQ-NEXT: movq $-1, %rax
; X64-AVX512DQ-NEXT: vmovq %rax, %xmm1
; X64-AVX512DQ-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X64-AVX512DQ-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 0, i64 -1>
ret <2 x i64> %1
Expand All @@ -1648,40 +1674,48 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
;
; X64-LABEL: mul_v2i64_15_neg_63:
; X64: # %bb.0:
; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,18446744073709551553]
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: pmuludq %xmm1, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm3
; X64-NEXT: psrlq $32, %xmm3
; X64-NEXT: pmuludq %xmm1, %xmm3
; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $32, %xmm1
; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,18446744073709551553]
; X64-NEXT: pmuludq %xmm2, %xmm1
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: movq %rax, %xmm3
; X64-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; X64-NEXT: pmuludq %xmm0, %xmm3
; X64-NEXT: paddq %xmm1, %xmm3
; X64-NEXT: psllq $32, %xmm3
; X64-NEXT: pmuludq %xmm2, %xmm0
; X64-NEXT: paddq %xmm3, %xmm0
; X64-NEXT: psllq $32, %xmm0
; X64-NEXT: paddq %xmm2, %xmm0
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v2i64_15_neg_63:
; X64-XOP: # %bb.0:
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [15,18446744073709551553]
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
; X64-XOP-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm1
; X64-XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [15,18446744073709551553]
; X64-XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; X64-XOP-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-XOP-NEXT: vmovq %rax, %xmm3
; X64-XOP-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; X64-XOP-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; X64-XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1
; X64-XOP-NEXT: vpsllq $32, %xmm1, %xmm1
; X64-XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; X64-XOP-NEXT: retq
;
; X64-AVX2-LABEL: mul_v2i64_15_neg_63:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,18446744073709551553]
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
; X64-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm1
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,18446744073709551553]
; X64-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
; X64-AVX2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-AVX2-NEXT: vmovq %rax, %xmm3
; X64-AVX2-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7]
; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm3, %xmm1
; X64-AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
; X64-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; X64-AVX2-NEXT: retq
;
; X64-AVX512DQ-LABEL: mul_v2i64_15_neg_63:
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: movq $-1, %rax
; AVX512F-NEXT: vmovq %rax, %xmm1
; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Expand Down
15 changes: 12 additions & 3 deletions llvm/test/CodeGen/X86/vector-trunc-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2352,8 +2352,11 @@ define <8 x i16> @trunc_mul_v8i32_v8i16_zext_8i8(<16 x i8> %a0, <8 x i32> %a1) {
define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; SSE-LABEL: trunc_mul_const_v4i64_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: movl $1, %eax
; SSE-NEXT: movq %rax, %xmm2
; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
; SSE-NEXT: pmuludq %xmm2, %xmm0
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm1
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE-NEXT: retq
;
Expand Down Expand Up @@ -2505,7 +2508,10 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
; SSE-LABEL: trunc_mul_const_v16i64_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0
; SSE-NEXT: movl $1, %eax
; SSE-NEXT: movq %rax, %xmm8
; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
; SSE-NEXT: pmuludq %xmm8, %xmm0
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm1
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm2
; SSE-NEXT: pmuludq {{.*}}(%rip), %xmm3
Expand Down Expand Up @@ -2533,7 +2539,10 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_mul_const_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm8
; AVX1-NEXT: movl $1, %eax
; AVX1-NEXT: vmovq %rax, %xmm4
; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm8
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm5
Expand Down