116 changes: 47 additions & 69 deletions llvm/test/CodeGen/X86/oddsubvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -192,82 +192,67 @@ define void @PR42833() {
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: addl .Lb${{.*}}(%rip), %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: movaps {{.*#+}} xmm3 = <u,1,1,1>
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: paddd %xmm3, %xmm4
; SSE2-NEXT: pslld $23, %xmm3
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm3
; SSE2-NEXT: cvttps2dq %xmm3, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: pmuludq %xmm3, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm3, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3]
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3
; SSE2-NEXT: psubd %xmm1, %xmm3
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: paddd %xmm0, %xmm3
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4
; SSE2-NEXT: psubd %xmm1, %xmm4
; SSE2-NEXT: paddd %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: paddd %xmm0, %xmm5
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3]
; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
; SSE2-NEXT: movaps %xmm5, .Lc$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm4
; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm3
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6
; SSE2-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm7
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
; SSE2-NEXT: psubd %xmm0, %xmm7
; SSE2-NEXT: psubd %xmm4, %xmm6
; SSE2-NEXT: psubd %xmm3, %xmm6
; SSE2-NEXT: psubd %xmm1, %xmm5
; SSE2-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa %xmm7, .Ld$local+{{.*}}(%rip)
; SSE2-NEXT: paddd %xmm4, %xmm4
; SSE2-NEXT: paddd %xmm3, %xmm3
; SSE2-NEXT: paddd %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip)
; SSE2-NEXT: movdqa %xmm3, .Lc$local+{{.*}}(%rip)
; SSE2-NEXT: retq
;
; SSE42-LABEL: PR42833:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0
; SSE42-NEXT: movd %xmm0, %eax
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
; SSE42-NEXT: movd %xmm1, %eax
; SSE42-NEXT: addl .Lb${{.*}}(%rip), %eax
; SSE42-NEXT: movdqa {{.*#+}} xmm2 = <u,1,1,1>
; SSE42-NEXT: pinsrd $0, %eax, %xmm2
; SSE42-NEXT: movdqa %xmm0, %xmm3
; SSE42-NEXT: paddd %xmm2, %xmm3
; SSE42-NEXT: pslld $23, %xmm2
; SSE42-NEXT: paddd {{.*}}(%rip), %xmm2
; SSE42-NEXT: cvttps2dq %xmm2, %xmm2
; SSE42-NEXT: pmulld %xmm0, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3,4,5,6,7]
; SSE42-NEXT: movd %eax, %xmm2
; SSE42-NEXT: paddd %xmm1, %xmm2
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm3
; SSE42-NEXT: psubd %xmm1, %xmm3
; SSE42-NEXT: paddd %xmm1, %xmm1
; SSE42-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1
; SSE42-NEXT: psubd %xmm0, %xmm3
; SSE42-NEXT: paddd %xmm0, %xmm0
; SSE42-NEXT: movdqa %xmm1, %xmm4
; SSE42-NEXT: paddd %xmm1, %xmm4
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa %xmm4, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0
; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm2
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm4
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm5
; SSE42-NEXT: movdqa .Ld$local+{{.*}}(%rip), %xmm6
; SSE42-NEXT: pinsrd $0, %eax, %xmm0
; SSE42-NEXT: psubd %xmm0, %xmm6
; SSE42-NEXT: pinsrd $0, %eax, %xmm1
; SSE42-NEXT: psubd %xmm1, %xmm6
; SSE42-NEXT: psubd %xmm2, %xmm5
; SSE42-NEXT: psubd %xmm1, %xmm4
; SSE42-NEXT: psubd %xmm0, %xmm4
; SSE42-NEXT: movdqa %xmm4, .Ld$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa %xmm5, .Ld$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa %xmm3, .Ld$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa %xmm6, .Ld$local+{{.*}}(%rip)
; SSE42-NEXT: paddd %xmm2, %xmm2
; SSE42-NEXT: paddd %xmm1, %xmm1
; SSE42-NEXT: movdqa %xmm1, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: paddd %xmm0, %xmm0
; SSE42-NEXT: movdqa %xmm0, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: movdqa %xmm2, .Lc$local+{{.*}}(%rip)
; SSE42-NEXT: retq
;
Expand All @@ -276,17 +261,13 @@ define void @PR42833() {
; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: addl .Lb${{.*}}(%rip), %eax
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <u,1,1,1>
; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2
; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpslld $1, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7]
; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2
; AVX1-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip)
Expand Down Expand Up @@ -316,10 +297,9 @@ define void @PR42833() {
; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0
; AVX2-NEXT: addl .Lc$local+{{.*}}(%rip), %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0],mem[1,2,3,4,5,6,7]
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm3
; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3,4,5,6,7]
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm3
; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7]
; AVX2-NEXT: vmovdqu %ymm2, .Lc$local+{{.*}}(%rip)
; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm2
; AVX2-NEXT: vmovdqu .Ld$local+{{.*}}(%rip), %ymm3
Expand All @@ -341,10 +321,9 @@ define void @PR42833() {
; AVX512-NEXT: vmovdqu64 .Lc$local+{{.*}}(%rip), %zmm1
; AVX512-NEXT: addl .Lc$local+{{.*}}(%rip), %eax
; AVX512-NEXT: vmovd %eax, %xmm2
; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],mem[1,2,3,4,5,6,7]
; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm3
; AVX512-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0],ymm0[1,2,3,4,5,6,7]
; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm2
; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7]
; AVX512-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm2
; AVX512-NEXT: vmovdqu %ymm0, .Lc$local+{{.*}}(%rip)
; AVX512-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0
Expand All @@ -364,14 +343,13 @@ define void @PR42833() {
; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0
; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: addl .Lb${{.*}}(%rip), %eax
; XOP-NEXT: vmovdqa {{.*#+}} xmm1 = <u,1,1,1>
; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; XOP-NEXT: vmovd %eax, %xmm1
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2
; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm3
; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm1
; XOP-NEXT: vpslld $1, %xmm3, %xmm3
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7]
; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
; XOP-NEXT: vmovdqa .Ld$local+{{.*}}(%rip), %xmm2
; XOP-NEXT: vpsubd .Lc$local+{{.*}}(%rip), %xmm2, %xmm2
; XOP-NEXT: vmovups %ymm1, .Lc$local+{{.*}}(%rip)
Expand Down
37 changes: 11 additions & 26 deletions llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; SSE-LABEL: splatvar_funnnel_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63]
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: psubq %xmm1, %xmm3
Expand All @@ -683,31 +682,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE-NEXT: por %xmm4, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: splatvar_funnnel_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: splatvar_funnnel_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -514,9 +514,9 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
Expand All @@ -537,7 +537,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3
Expand Down
37 changes: 11 additions & 26 deletions llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,6 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; SSE-LABEL: splatvar_funnnel_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63]
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: psubq %xmm1, %xmm3
Expand All @@ -723,31 +722,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE-NEXT: por %xmm4, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: splatvar_funnnel_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: splatvar_funnnel_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,9 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
Expand All @@ -583,7 +583,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/X86/vector-narrow-binop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,11 @@ define <4 x double> @fmul_v2f64(<2 x double> %x, <2 x double> %y) {
; SSE: # %bb.0:
; SSE-NEXT: movapd %xmm1, %xmm2
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm0, %xmm0
; SSE-NEXT: mulpd %xmm2, %xmm2
; SSE-NEXT: addpd %xmm0, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: mulpd %xmm1, %xmm1
; SSE-NEXT: addpd %xmm2, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: fmul_v2f64:
Expand Down