168 changes: 84 additions & 84 deletions llvm/test/CodeGen/X86/matrix-multiply.ll

Large diffs are not rendered by default.

52 changes: 26 additions & 26 deletions llvm/test/CodeGen/X86/oddshuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, ptr %p) nounwind {
;
; AVX-LABEL: v5i32:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,2,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,2,2,3]
; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX-NEXT: vextractps $3, %xmm0, 16(%rdi)
; AVX-NEXT: vmovaps %xmm1, (%rdi)
Expand Down Expand Up @@ -161,7 +161,7 @@ define void @v5f32(<4 x float> %a, <4 x float> %b, ptr %p) nounwind {
; AVX-LABEL: v5f32:
; AVX: # %bb.0:
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
; AVX-NEXT: vextractps $3, %xmm0, 16(%rdi)
; AVX-NEXT: vmovaps %xmm1, (%rdi)
; AVX-NEXT: retq
Expand Down Expand Up @@ -313,9 +313,9 @@ define void @v7i32(<4 x i32> %a, <4 x i32> %b, ptr %p) nounwind {
; AVX-LABEL: v7i32:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3]
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,2]
; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,3,2]
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; AVX-NEXT: vmovss %xmm1, 24(%rdi)
; AVX-NEXT: vmovlps %xmm0, 16(%rdi)
; AVX-NEXT: vmovaps %xmm2, (%rdi)
Expand Down Expand Up @@ -513,12 +513,12 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, ptr %p) nounwind {
; AVX1-NEXT: vmovsldup {{.*#+}} ymm2 = ymm2[0,0,2,2,4,4,6,6]
; AVX1-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[0,u,u,1,5,u,u,6]
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2,3,4,5],ymm2[6],ymm3[7]
; AVX1-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[0,1,0,1]
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm3
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3],xmm3[3,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; AVX1-NEXT: vmovaps %xmm0, 32(%rdi)
; AVX1-NEXT: vmovaps %ymm2, (%rdi)
Expand All @@ -533,7 +533,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, ptr %p) nounwind {
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3],xmm3[3,3]
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; AVX2-SLOW-NEXT: vmovaps %xmm0, 32(%rdi)
; AVX2-SLOW-NEXT: vmovaps %ymm2, (%rdi)
Expand All @@ -548,7 +548,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, ptr %p) nounwind {
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm3 = <u,3,7,u,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm3, %ymm0
; AVX2-FAST-ALL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-FAST-ALL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, 32(%rdi)
; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, (%rdi)
Expand All @@ -563,7 +563,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, ptr %p) nounwind {
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3],xmm3[3,3]
; AVX2-FAST-PERLANE-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, 32(%rdi)
; AVX2-FAST-PERLANE-NEXT: vmovaps %ymm2, (%rdi)
Expand All @@ -574,12 +574,12 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, ptr %p) nounwind {
; XOP: # %bb.0:
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
; XOP-NEXT: vpermil2ps {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[u,1,5,u],ymm2[6],ymm0[6]
; XOP-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[0,1,0,1]
; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm1[0,1,0,1]
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm3
; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
; XOP-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3],xmm3[3,3]
; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; XOP-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; XOP-NEXT: vmovaps %xmm0, 32(%rdi)
; XOP-NEXT: vmovaps %ymm2, (%rdi)
Expand Down Expand Up @@ -1479,14 +1479,14 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm3[0,3],ymm4[0,2],ymm3[4,7],ymm4[4,6]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,0,1]
; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm0[1,0],ymm4[2,0],ymm0[5,4],ymm4[6,4]
; AVX1-NEXT: vpermilps {{.*#+}} ymm5 = ymm5[0,1,2,0,4,5,6,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm5[0,1,2,0,4,5,6,4]
; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm0[2,0],ymm4[3,0],ymm0[6,4],ymm4[7,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm4[0,0],ymm5[2,0],ymm4[4,4],ymm5[6,4]
; AVX1-NEXT: vmovups 16(%rdi), %xmm6
; AVX1-NEXT: vblendps {{.*#+}} ymm7 = ymm2[0,1],ymm1[2],ymm2[3,4],ymm1[5],ymm2[6,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm7[1,2],ymm6[0,3],ymm7[5,6],ymm6[4,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm7 = ymm7[0,2,3,1,4,6,7,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm7[0,2,3,1,4,6,7,5]
; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm7[0,1,2,3,4],ymm5[5,6,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm6[1,0],ymm2[2,0],ymm6[5,4],ymm2[6,4]
Expand Down Expand Up @@ -1520,7 +1520,7 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
; AVX2-SLOW-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm2[0,1,0,3,4,5,4,7]
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,1,0,3,4,5,4,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
; AVX2-SLOW-NEXT: vmovups %ymm3, (%rsi)
Expand Down Expand Up @@ -1580,7 +1580,7 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpermilps {{.*#+}} ymm1 = ymm2[0,1,0,3,4,5,4,7]
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,1,0,3,4,5,4,7]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm3, (%rsi)
Expand All @@ -1600,14 +1600,14 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; XOP-NEXT: vshufps {{.*#+}} ymm3 = ymm3[0,3],ymm4[0,2],ymm3[4,7],ymm4[4,6]
; XOP-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3,0,1]
; XOP-NEXT: vshufps {{.*#+}} ymm5 = ymm0[1,0],ymm4[2,0],ymm0[5,4],ymm4[6,4]
; XOP-NEXT: vpermilps {{.*#+}} ymm5 = ymm5[0,1,2,0,4,5,6,4]
; XOP-NEXT: vshufps {{.*#+}} ymm5 = ymm5[0,1,2,0,4,5,6,4]
; XOP-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; XOP-NEXT: vshufps {{.*#+}} ymm5 = ymm0[2,0],ymm4[3,0],ymm0[6,4],ymm4[7,4]
; XOP-NEXT: vshufps {{.*#+}} ymm5 = ymm4[0,0],ymm5[2,0],ymm4[4,4],ymm5[6,4]
; XOP-NEXT: vmovups 16(%rdi), %xmm6
; XOP-NEXT: vblendps {{.*#+}} ymm7 = ymm2[0,1],ymm1[2],ymm2[3,4],ymm1[5],ymm2[6,7]
; XOP-NEXT: vshufps {{.*#+}} ymm7 = ymm7[1,2],ymm6[0,3],ymm7[5,6],ymm6[4,7]
; XOP-NEXT: vpermilps {{.*#+}} ymm7 = ymm7[0,2,3,1,4,6,7,5]
; XOP-NEXT: vshufps {{.*#+}} ymm7 = ymm7[0,2,3,1,4,6,7,5]
; XOP-NEXT: vblendps {{.*#+}} ymm5 = ymm7[0,1,2,3,4],ymm5[5,6,7]
; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7]
; XOP-NEXT: vshufps {{.*#+}} ymm2 = ymm6[1,0],ymm2[2,0],ymm6[5,4],ymm2[6,4]
Expand Down Expand Up @@ -1746,7 +1746,7 @@ define void @interleave_24i32_in(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-SLOW-NEXT: vmovups (%rdx), %ymm1
; AVX2-SLOW-NEXT: vmovups (%rcx), %ymm2
; AVX2-SLOW-NEXT: vbroadcastsd 24(%rsi), %ymm3
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[1,2,3,3,5,6,7,7]
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm4 = ymm1[1,2,3,3,5,6,7,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[2,2,2,3]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1],ymm3[2],ymm4[3,4],ymm3[5],ymm4[6,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[2,1,3,3]
Expand All @@ -1757,7 +1757,7 @@ define void @interleave_24i32_in(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
; AVX2-SLOW-NEXT: vbroadcastsd (%rcx), %ymm5
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm2[1,1,2,2]
Expand Down Expand Up @@ -1786,7 +1786,7 @@ define void @interleave_24i32_in(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
; AVX2-FAST-ALL-NEXT: vbroadcastsd (%rcx), %ymm5
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
; AVX2-FAST-ALL-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-FAST-ALL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-FAST-ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; AVX2-FAST-ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm2[1,1,2,2]
Expand All @@ -1803,7 +1803,7 @@ define void @interleave_24i32_in(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-FAST-PERLANE-NEXT: vmovups (%rdx), %ymm1
; AVX2-FAST-PERLANE-NEXT: vmovups (%rcx), %ymm2
; AVX2-FAST-PERLANE-NEXT: vbroadcastsd 24(%rsi), %ymm3
; AVX2-FAST-PERLANE-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[1,2,3,3,5,6,7,7]
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm4 = ymm1[1,2,3,3,5,6,7,7]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[2,2,2,3]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1],ymm3[2],ymm4[3,4],ymm3[5],ymm4[6,7]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[2,1,3,3]
Expand All @@ -1814,7 +1814,7 @@ define void @interleave_24i32_in(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
; AVX2-FAST-PERLANE-NEXT: vbroadcastsd (%rcx), %ymm5
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
; AVX2-FAST-PERLANE-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm1 = ymm2[1,1,2,2]
Expand Down Expand Up @@ -2409,7 +2409,7 @@ define void @D107009(ptr %input, ptr %output) {
; AVX1-NEXT: vunpcklps {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[4],mem[4],ymm1[5],mem[5]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,4]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
Expand All @@ -2420,7 +2420,7 @@ define void @D107009(ptr %input, ptr %output) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vmovshdup {{.*#+}} ymm3 = ymm1[1,1,3,3,5,5,7,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[3,3,3,3,7,7,7,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm4 = ymm1[3,3,3,3,7,7,7,7]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm5 = ymm1[0,0,3,2]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[1,1,1,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,3,3,3]
Expand Down Expand Up @@ -2481,7 +2481,7 @@ define void @D107009(ptr %input, ptr %output) {
; XOP-NEXT: vunpcklps {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[4],mem[4],ymm1[5],mem[5]
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
; XOP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
; XOP-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,4]
; XOP-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,2,0,4,5,6,4]
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; XOP-NEXT: vpsrld $16, %xmm0, %xmm0
Expand All @@ -2492,7 +2492,7 @@ define void @D107009(ptr %input, ptr %output) {
; XOP-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; XOP-NEXT: vmovshdup {{.*#+}} ymm3 = ymm1[1,1,3,3,5,5,7,7]
; XOP-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[3,3,3,3,7,7,7,7]
; XOP-NEXT: vshufps {{.*#+}} ymm4 = ymm1[3,3,3,3,7,7,7,7]
; XOP-NEXT: vpermilpd {{.*#+}} ymm5 = ymm1[0,0,3,2]
; XOP-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[1,1,1,1]
; XOP-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[3,3,3,3]
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: X86 Atom pad short functions
; CHECK-NEXT: X86 LEA Fixup
; CHECK-NEXT: X86 Fixup Inst Tuning
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible
; CHECK-NEXT: X86 Discriminate Memory Operands
; CHECK-NEXT: X86 Insert Cache Prefetches
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/packss.ll
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vzeroupper
Expand Down Expand Up @@ -236,7 +236,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
; X64-AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vzeroupper
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/palignr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
;
; CHECK-AVX-LABEL: test1:
; CHECK-AVX: # %bb.0:
; CHECK-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,0]
; CHECK-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,2,3,0]
; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
ret <4 x i32> %C
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr31956.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ define <4 x float> @foo() {
; CHECK-NEXT: vmovaps G2(%rip), %xmm0
; CHECK-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: retq
entry:
%V = load <2 x float>, ptr @G1, align 8
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/pr40730.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,1,1,0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
Expand All @@ -27,7 +27,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,1,1,0]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7]
; CHECK-NEXT: retq
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/pr40811.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ define <8 x i32> @_Z6test70v(ptr %id14793) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovaps (%rdi), %xmm0
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2,3]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,1,0]
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2,1,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: retq
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr50609.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @PR50609(ptr noalias nocapture %RET, ptr noalias %aFOO, <16 x i32> %
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2
; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,0,0,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi)
; CHECK-NEXT: vmaskmovps %ymm2, %ymm1, 32(%rdi)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/rotate_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ define <4 x i32> @rot_v4i32_non_splat_2masks(<4 x i32> %x) {
define <4 x i32> @rot_v4i32_zero_non_splat(<4 x i32> %x) {
; XOPAVX1-LABEL: rot_v4i32_zero_non_splat:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; XOPAVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: rot_v4i32_zero_non_splat:
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/scalarize-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ define <4 x float> @fmul_splat_splat_v4f32(<4 x float> %vx, <4 x float> %vy) {
; AVX-LABEL: fmul_splat_splat_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
%splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
Expand All @@ -413,7 +413,7 @@ define <8 x float> @fdiv_splat_splat_v8f32(<8 x float> %vx, <8 x float> %vy) {
; AVX-LABEL: fdiv_splat_splat_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
%splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer
Expand Down Expand Up @@ -555,7 +555,7 @@ define <4 x float> @fmul_splat_const_op1_v4f32(<4 x float> %vx, <4 x float> %vy)
; AVX-LABEL: fmul_splat_const_op1_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
%r = fmul fast <4 x float> %splatx, <float 17.0, float 17.0, float 17.0, float 17.0>
Expand All @@ -575,7 +575,7 @@ define <8 x float> @fdiv_splat_const_op0_v8f32(<8 x float> %vy) {
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
%splatx = shufflevector <8 x float> <float 4.5, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer
Expand All @@ -597,7 +597,7 @@ define <8 x float> @fdiv_const_op1_splat_v8f32(<8 x float> %vx) {
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
%splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer
Expand Down Expand Up @@ -654,7 +654,7 @@ define <4 x float> @splat0_fmul_v4f32(<4 x float> %vx, <4 x float> %vy) {
; AVX-LABEL: splat0_fmul_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%b = fmul fast <4 x float> %vx, %vy
%r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
Expand All @@ -672,7 +672,7 @@ define <8 x float> @splat0_fdiv_v8f32(<8 x float> %vx, <8 x float> %vy) {
; AVX-LABEL: splat0_fdiv_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
%b = fdiv fast <8 x float> %vx, %vy
Expand Down Expand Up @@ -729,7 +729,7 @@ define <4 x float> @splat0_fmul_const_op1_v4f32(<4 x float> %vx) {
; AVX-LABEL: splat0_fmul_const_op1_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%b = fmul fast <4 x float> %vx, <float 6.0, float -1.0, float 1.0, float 7.0>
%r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
Expand All @@ -745,7 +745,7 @@ define <8 x float> @splat0_fdiv_const_op1_v8f32(<8 x float> %vx) {
;
; AVX-LABEL: splat0_fdiv_const_op1_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
%b = fdiv fast <8 x float> %vx, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
Expand All @@ -766,7 +766,7 @@ define <8 x float> @splat0_fdiv_const_op0_v8f32(<8 x float> %vx) {
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
%b = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %vx
Expand All @@ -786,7 +786,7 @@ define <4 x float> @multi_use_binop(<4 x float> %x, <4 x float> %y) {
; AVX-LABEL: multi_use_binop:
; AVX: # %bb.0:
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,0,0,0]
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/shuffle-of-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ define <4 x i32> @shuffle_i32_of_ashr_i64(<2 x i64> %x) nounwind {
; X64-AVX2-NEXT: pushq %rax
; X64-AVX2-NEXT: movl $63, %edi
; X64-AVX2-NEXT: callq llvm.x86.sse2.psrai.q@PLT
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; X64-AVX2-NEXT: popq %rax
; X64-AVX2-NEXT: retq
;
Expand All @@ -173,7 +173,7 @@ define <4 x i32> @shuffle_i32_of_ashr_i64(<2 x i64> %x) nounwind {
; X86-AVX2-NEXT: pushl $63
; X86-AVX2-NEXT: calll llvm.x86.sse2.psrai.q@PLT
; X86-AVX2-NEXT: addl $4, %esp
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; X86-AVX2-NEXT: retl
%i1 = tail call <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64> %x, i32 63)
%i2 = bitcast <2 x i64> %i1 to <4 x i32>
Expand Down Expand Up @@ -336,7 +336,7 @@ define <2 x i64> @shuffle_i64_of_ashr_i64(<2 x i64> %x) nounwind {
; X64-AVX2-NEXT: pushq %rax
; X64-AVX2-NEXT: movl $63, %edi
; X64-AVX2-NEXT: callq llvm.x86.sse2.psrai.q@PLT
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-AVX2-NEXT: popq %rax
; X64-AVX2-NEXT: retq
;
Expand All @@ -353,7 +353,7 @@ define <2 x i64> @shuffle_i64_of_ashr_i64(<2 x i64> %x) nounwind {
; X86-AVX2-NEXT: pushl $63
; X86-AVX2-NEXT: calll llvm.x86.sse2.psrai.q@PLT
; X86-AVX2-NEXT: addl $4, %esp
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X86-AVX2-NEXT: retl
%i1 = tail call <2 x i64> @llvm.x86.sse2.psrai.q(<2 x i64> %x, i32 63)
%i2 = bitcast <2 x i64> %i1 to <2 x i64>
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind {
define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
Expand All @@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask2:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
Expand All @@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask3:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
Expand All @@ -88,8 +88,8 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind {
; AVX2-LABEL: undef_splatmask4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,2,3,3]
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: vmovaps %xmm1, %xmm0
; AVX2-NEXT: retq
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse-fsignum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ define void @signum64a(ptr) {
; AVX-NEXT: vmovapd (%rdi), %xmm0
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,2,3]
; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX-NEXT: vmovapd %xmm0, (%rdi)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2104,7 +2104,7 @@ define <4 x float> @test_mm_set_ps1(float %a0) nounwind {
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
Expand All @@ -2123,7 +2123,7 @@ define <4 x float> @test_mm_set_ps1(float %a0) nounwind {
;
; X64-AVX1-LABEL: test_mm_set_ps1:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
Expand Down Expand Up @@ -2265,7 +2265,7 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
Expand All @@ -2284,7 +2284,7 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
;
; X64-AVX1-LABEL: test_mm_set1_ps:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
Expand Down Expand Up @@ -2623,7 +2623,7 @@ define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) {
; X86-AVX1-LABEL: test_mm_store_ps1:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
Expand All @@ -2644,7 +2644,7 @@ define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) {
;
; X64-AVX1-LABEL: test_mm_store_ps1:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
Expand Down Expand Up @@ -2710,7 +2710,7 @@ define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) {
; X86-AVX1-LABEL: test_mm_store1_ps:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
Expand All @@ -2731,7 +2731,7 @@ define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) {
;
; X64-AVX1-LABEL: test_mm_store1_ps:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
Expand Down Expand Up @@ -2972,15 +2972,15 @@ define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) {
; X86-AVX1-LABEL: test_mm_storer_ps:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
; X86-AVX1-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0]
; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_storer_ps:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
; X86-AVX512-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0]
; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
Expand All @@ -2994,14 +2994,14 @@ define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) {
;
; X64-AVX1-LABEL: test_mm_storer_ps:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
; X64-AVX1-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0]
; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_storer_ps:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
; X64-AVX512-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0]
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4523,7 +4523,7 @@ define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
Expand Down Expand Up @@ -5636,7 +5636,7 @@ define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
;
; AVX1-LABEL: test_mm_shuffle_epi32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -402,13 +402,13 @@ define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
;
; AVX1-LABEL: test_x86_sse2_pshuf_d:
; AVX1: ## %bb.0: ## %entry
; AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
; AVX1-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0]
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
;
; AVX512-LABEL: test_x86_sse2_pshuf_d:
; AVX512: ## %bb.0: ## %entry
; AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
; AVX512-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0]
; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
entry:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/sse2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ define void @test4(<4 x float> %X, ptr %res) nounwind {
; X86-AVX-LABEL: test4:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
; X86-AVX-NEXT: vmovaps %xmm0, (%eax)
; X86-AVX-NEXT: retl
;
Expand All @@ -156,7 +156,7 @@ define void @test4(<4 x float> %X, ptr %res) nounwind {
;
; X64-AVX-LABEL: test4:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
; X64-AVX-NEXT: vmovaps %xmm0, (%rdi)
; X64-AVX-NEXT: retq
%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
Expand Down Expand Up @@ -448,7 +448,7 @@ define void @test13(ptr %res, ptr %A, ptr %B, ptr %C) nounwind {
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX-NEXT: vmovaps (%edx), %xmm0
; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; X86-AVX-NEXT: vmovaps %xmm0, (%eax)
; X86-AVX-NEXT: retl
;
Expand All @@ -464,7 +464,7 @@ define void @test13(ptr %res, ptr %A, ptr %B, ptr %C) nounwind {
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps (%rdx), %xmm0
; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; X64-AVX-NEXT: vmovaps %xmm0, (%rdi)
; X64-AVX-NEXT: retq
%tmp3 = load <4 x float>, ptr %B ; <<4 x float>> [#uses=1]
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,16 @@ define <4 x float> @test13(<4 x float> %A, <4 x float> %B) {
;
; AVX1-LABEL: test13:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX512-LABEL: test13:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vbroadcastss %xmm0, %xmm0
; AVX512-NEXT: retq
Expand Down Expand Up @@ -407,8 +407,8 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; AVX1-NEXT: vaddss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovsldup {{.*#+}} xmm1 = xmm2[0,0,2,2]
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
Expand All @@ -419,8 +419,8 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; AVX512-NEXT: vaddss %xmm3, %xmm2, %xmm2
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vbroadcastss %xmm2, %xmm1
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
Expand Down Expand Up @@ -469,8 +469,8 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; AVX-NEXT: vaddss %xmm2, %xmm5, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; AVX-NEXT: retq
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/X86/sse41.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ define float @ext_1(<4 x float> %v) nounwind {
; X86-AVX1-LABEL: ext_1:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: pushl %eax ## encoding: [0x50]
; X86-AVX1-NEXT: vpermilps $255, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X86-AVX1-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X86-AVX1-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X86-AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x05,A,A,A,A]
; X86-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
Expand All @@ -206,7 +206,7 @@ define float @ext_1(<4 x float> %v) nounwind {
; X86-AVX512-LABEL: ext_1:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: pushl %eax ## encoding: [0x50]
; X86-AVX512-NEXT: vpermilps $255, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X86-AVX512-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X86-AVX512-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X86-AVX512-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0x05,A,A,A,A]
; X86-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
Expand All @@ -225,15 +225,15 @@ define float @ext_1(<4 x float> %v) nounwind {
;
; X64-AVX1-LABEL: ext_1:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpermilps $255, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X64-AVX1-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X64-AVX1-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X64-AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x05,A,A,A,A]
; X64-AVX1-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: ext_1:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpermilps $255, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X64-AVX512-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X64-AVX512-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X64-AVX512-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0x05,A,A,A,A]
; X64-AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
Expand All @@ -257,7 +257,7 @@ define float @ext_2(<4 x float> %v) nounwind {
; X86-AVX1-LABEL: ext_2:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: pushl %eax ## encoding: [0x50]
; X86-AVX1-NEXT: vpermilps $255, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X86-AVX1-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X86-AVX1-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X86-AVX1-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
; X86-AVX1-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24]
Expand All @@ -267,7 +267,7 @@ define float @ext_2(<4 x float> %v) nounwind {
; X86-AVX512-LABEL: ext_2:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: pushl %eax ## encoding: [0x50]
; X86-AVX512-NEXT: vpermilps $255, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X86-AVX512-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X86-AVX512-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X86-AVX512-NEXT: vmovss %xmm0, (%esp) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24]
; X86-AVX512-NEXT: flds (%esp) ## encoding: [0xd9,0x04,0x24]
Expand All @@ -282,13 +282,13 @@ define float @ext_2(<4 x float> %v) nounwind {
;
; X64-AVX1-LABEL: ext_2:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpermilps $255, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X64-AVX1-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X64-AVX1-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: ext_2:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpermilps $255, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; X64-AVX512-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; X64-AVX512-NEXT: ## xmm0 = xmm0[3,3,3,3]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%s = extractelement <4 x float> %v, i32 3
Expand Down Expand Up @@ -696,15 +696,15 @@ define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b)
;
; AVX1-LABEL: insertps_from_shufflevector_i32_2:
; AVX1: ## %bb.0: ## %entry
; AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; AVX1-NEXT: vshufps $238, %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0xc6,0xc9,0xee]
; AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
;
; AVX512-LABEL: insertps_from_shufflevector_i32_2:
; AVX512: ## %bb.0: ## %entry
; AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; AVX512-NEXT: vshufps $238, %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0xc6,0xc9,0xee]
; AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
; AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
Expand Down Expand Up @@ -1096,7 +1096,7 @@ define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
;
; AVX1-LABEL: i32_shuf_XYY0:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
; AVX1-NEXT: vshufps $212, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xd4]
; AVX1-NEXT: ## xmm0 = xmm0[0,1,1,3]
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
Expand All @@ -1105,7 +1105,7 @@ define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
;
; AVX512-LABEL: i32_shuf_XYY0:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
; AVX512-NEXT: vshufps $212, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xd4]
; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3]
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
; AVX512-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
Expand All @@ -1132,7 +1132,7 @@ define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
;
; AVX1-LABEL: i32_shuf_XYW0:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpermilps $244, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xf4]
; AVX1-NEXT: vshufps $244, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xf4]
; AVX1-NEXT: ## xmm0 = xmm0[0,1,3,3]
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
Expand All @@ -1141,7 +1141,7 @@ define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
;
; AVX512-LABEL: i32_shuf_XYW0:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpermilps $244, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xf4]
; AVX512-NEXT: vshufps $244, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xf4]
; AVX512-NEXT: ## xmm0 = xmm0[0,1,3,3]
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
; AVX512-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
Expand Down Expand Up @@ -1169,7 +1169,7 @@ define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
;
; AVX1-LABEL: i32_shuf_W00W:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpermilps $255, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; AVX1-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; AVX1-NEXT: ## xmm0 = xmm0[3,3,3,3]
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; AVX1-NEXT: vblendps $6, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x06]
Expand All @@ -1178,7 +1178,7 @@ define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
;
; AVX512-LABEL: i32_shuf_W00W:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpermilps $255, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xff]
; AVX512-NEXT: vshufps $255, %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0xff]
; AVX512-NEXT: ## xmm0 = xmm0[3,3,3,3]
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
; AVX512-NEXT: vblendps $6, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x06]
Expand Down Expand Up @@ -1209,7 +1209,7 @@ define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
; AVX1-NEXT: vblendps $1, %xmm0, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x0c,0xc0,0x01]
; AVX1-NEXT: ## xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vpermilps $0, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x00]
; AVX1-NEXT: vshufps $0, %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0xc6,0xc9,0x00]
; AVX1-NEXT: ## xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
; AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
Expand Down Expand Up @@ -1245,7 +1245,7 @@ define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
; AVX1-LABEL: i32_shuf_X00X:
; AVX1: ## %bb.0:
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
; AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
; AVX1-NEXT: ## xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vblendps $6, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x06]
; AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
Expand Down Expand Up @@ -1880,7 +1880,7 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, ptr
; X86-AVX1-LABEL: insertps_pr20411:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X86-AVX1-NEXT: vshufps $238, %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0xc6,0xc9,0xee]
; X86-AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X86-AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X86-AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
Expand All @@ -1890,7 +1890,7 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, ptr
; X86-AVX512-LABEL: insertps_pr20411:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X86-AVX512-NEXT: vshufps $238, %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0xc6,0xc9,0xee]
; X86-AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X86-AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X86-AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
Expand All @@ -1908,7 +1908,7 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, ptr
;
; X64-AVX1-LABEL: insertps_pr20411:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X64-AVX1-NEXT: vshufps $238, %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0xc6,0xc9,0xee]
; X64-AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X64-AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X64-AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
Expand All @@ -1917,7 +1917,7 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, ptr
;
; X64-AVX512-LABEL: insertps_pr20411:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X64-AVX512-NEXT: vshufps $238, %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0xc6,0xc9,0xee]
; X64-AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X64-AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X64-AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/swizzle-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ define <8 x i32> @swizzle_1(<8 x i32> %v) {
define <8 x i32> @swizzle_2(<8 x i32> %v) {
; CHECK-LABEL: swizzle_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: retq
%1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
Expand All @@ -35,7 +35,7 @@ define <8 x i32> @swizzle_2(<8 x i32> %v) {
define <8 x i32> @swizzle_3(<8 x i32> %v) {
; CHECK-LABEL: swizzle_3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: retq
%1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
Expand Down
81 changes: 60 additions & 21 deletions llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSZrr:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: retq
%shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x float> %shufp
Expand All @@ -16,7 +16,7 @@ define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind {
define <8 x float> @transform_VPERMILPSYrr(<8 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSYrr:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: retq
%shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %shufp
Expand All @@ -25,7 +25,7 @@ define <8 x float> @transform_VPERMILPSYrr(<8 x float> %a) nounwind {
define <4 x float> @transform_VPERMILPSrr(<4 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSrr:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retq
%shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shufp
Expand Down Expand Up @@ -107,30 +107,75 @@ define <4 x float> @transform_VPERMILPSrrk(<4 x float> %a, <4 x float> %b, i4 %m
}

define <16 x float> @transform_VPERMILPSZrm(ptr %ap) nounwind {
; CHECK-LABEL: transform_VPERMILPSZrm:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: retq
; CHECK-ICX-LABEL: transform_VPERMILPSZrm:
; CHECK-ICX: # %bb.0:
; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSZrm:
; CHECK-V4: # %bb.0:
; CHECK-V4-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-V4-NEXT: retq
;
; CHECK-AVX512-LABEL: transform_VPERMILPSZrm:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-AVX512-NEXT: retq
;
; CHECK-ZNVER4-LABEL: transform_VPERMILPSZrm:
; CHECK-ZNVER4: # %bb.0:
; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-ZNVER4-NEXT: retq
%a = load <16 x float>, ptr %ap
%shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x float> %shufp
}

define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
; CHECK-LABEL: transform_VPERMILPSYrm:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-NEXT: retq
; CHECK-ICX-LABEL: transform_VPERMILPSYrm:
; CHECK-ICX: # %bb.0:
; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSYrm:
; CHECK-V4: # %bb.0:
; CHECK-V4-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-V4-NEXT: retq
;
; CHECK-AVX512-LABEL: transform_VPERMILPSYrm:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX512-NEXT: retq
;
; CHECK-ZNVER4-LABEL: transform_VPERMILPSYrm:
; CHECK-ZNVER4: # %bb.0:
; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-ZNVER4-NEXT: retq
%a = load <8 x float>, ptr %ap
%shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %shufp
}

define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
; CHECK-LABEL: transform_VPERMILPSrm:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-NEXT: retq
; CHECK-ICX-LABEL: transform_VPERMILPSrm:
; CHECK-ICX: # %bb.0:
; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSrm:
; CHECK-V4: # %bb.0:
; CHECK-V4-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-V4-NEXT: retq
;
; CHECK-AVX512-LABEL: transform_VPERMILPSrm:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX512-NEXT: retq
;
; CHECK-ZNVER4-LABEL: transform_VPERMILPSrm:
; CHECK-ZNVER4: # %bb.0:
; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-ZNVER4-NEXT: retq
%a = load <4 x float>, ptr %ap
%shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shufp
Expand Down Expand Up @@ -213,9 +258,3 @@ define <4 x float> @transform_VPERMILPSrmk(ptr %ap, <4 x float> %b, i4 %mask_int
%res = select <4 x i1> %mask, <4 x float> %shufp, <4 x float> %b
ret <4 x float> %res
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-AVX512: {{.*}}
; CHECK-ICX: {{.*}}
; CHECK-V4: {{.*}}
; CHECK-ZNVER4: {{.*}}
55 changes: 40 additions & 15 deletions llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
define <8 x float> @transform_VPERMILPSYrr(<8 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSYrr:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: retq
%shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %shufp
Expand All @@ -16,33 +16,58 @@ define <8 x float> @transform_VPERMILPSYrr(<8 x float> %a) nounwind {
define <4 x float> @transform_VPERMILPSrr(<4 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSrr:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retq
%shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shufp
}

define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
; CHECK-LABEL: transform_VPERMILPSYrm:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-NEXT: retq
; CHECK-AVX1-LABEL: transform_VPERMILPSYrm:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX1-DELAY-LABEL: transform_VPERMILPSYrm:
; CHECK-AVX1-DELAY: # %bb.0:
; CHECK-AVX1-DELAY-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX1-DELAY-NEXT: retq
;
; CHECK-AVX2-LABEL: transform_VPERMILPSYrm:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX2-DELAY-LABEL: transform_VPERMILPSYrm:
; CHECK-AVX2-DELAY: # %bb.0:
; CHECK-AVX2-DELAY-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX2-DELAY-NEXT: retq
%a = load <8 x float>, ptr %ap
%shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %shufp
}

define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
; CHECK-LABEL: transform_VPERMILPSrm:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-NEXT: retq
; CHECK-AVX1-LABEL: transform_VPERMILPSrm:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX1-DELAY-LABEL: transform_VPERMILPSrm:
; CHECK-AVX1-DELAY: # %bb.0:
; CHECK-AVX1-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX1-DELAY-NEXT: retq
;
; CHECK-AVX2-LABEL: transform_VPERMILPSrm:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX2-DELAY-LABEL: transform_VPERMILPSrm:
; CHECK-AVX2-DELAY: # %bb.0:
; CHECK-AVX2-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX2-DELAY-NEXT: retq
%a = load <4 x float>, ptr %ap
%shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shufp
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-AVX1: {{.*}}
; CHECK-AVX1-DELAY: {{.*}}
; CHECK-AVX2: {{.*}}
; CHECK-AVX2-DELAY: {{.*}}
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX-64-NEXT: vcvttss2si %xmm1, %rax
; AVX-64-NEXT: vmovq %rax, %xmm1
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down Expand Up @@ -661,7 +661,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX512F-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-64-NEXT: vcvttss2si %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down Expand Up @@ -715,7 +715,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX512VL-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down Expand Up @@ -774,7 +774,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX-32-NEXT: vcomiss %xmm1, %xmm2
; AVX-32-NEXT: vmovaps %xmm1, %xmm3
; AVX-32-NEXT: jae .LBB3_4
Expand Down Expand Up @@ -836,7 +836,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
; AVX-64-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3]
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-64-NEXT: vcomiss %xmm1, %xmm3
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
Expand Down Expand Up @@ -908,7 +908,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: andl $-8, %esp
; AVX512F-32-NEXT: subl $40, %esp
; AVX512F-32-NEXT: .cfi_offset %ebx, -12
; AVX512F-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512F-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: xorl %eax, %eax
; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
Expand Down Expand Up @@ -974,7 +974,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX512F-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-64-NEXT: vcvttss2usi %xmm1, %rax
; AVX512F-64-NEXT: vmovq %rax, %xmm1
; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -1001,7 +1001,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: andl $-8, %esp
; AVX512VL-32-NEXT: subl $40, %esp
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
Expand Down Expand Up @@ -1067,7 +1067,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX512VL-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
Expand All @@ -399,7 +399,7 @@ define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
Expand Down Expand Up @@ -442,7 +442,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %edi, -16
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm3 = xmm2[3,3,3,3]
; AVX512VL-32-NEXT: vshufps {{.*#+}} xmm3 = xmm2[3,3,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
Expand Down Expand Up @@ -477,7 +477,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setae %al
Expand Down Expand Up @@ -574,7 +574,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
Expand All @@ -588,7 +588,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $24, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstps (%esp)
Expand All @@ -236,7 +236,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm1
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512DQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512DQ-32-NEXT: vzeroupper
Expand Down Expand Up @@ -410,7 +410,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $24, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
Expand Down Expand Up @@ -471,7 +471,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm1
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512DQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm0
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512DQ-32-NEXT: vzeroupper
Expand Down Expand Up @@ -887,21 +887,21 @@ define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 {
;
; AVX1-32-LABEL: uitofp_v2i1_v2f64:
; AVX1-32: # %bb.0:
; AVX1-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX1-32-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-32-NEXT: retl
;
; AVX1-64-LABEL: uitofp_v2i1_v2f64:
; AVX1-64: # %bb.0:
; AVX1-64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
; AVX512F-LABEL: uitofp_v2i1_v2f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
Expand All @@ -923,22 +923,22 @@ define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 {
;
; AVX512DQ-LABEL: uitofp_v2i1_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512DQVL-32-LABEL: uitofp_v2i1_v2f64:
; AVX512DQVL-32: # %bb.0:
; AVX512DQVL-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQVL-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQVL-32-NEXT: retl
;
; AVX512DQVL-64-LABEL: uitofp_v2i1_v2f64:
; AVX512DQVL-64: # %bb.0:
; AVX512DQVL-64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQVL-64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQVL-64-NEXT: retq
Expand Down Expand Up @@ -1218,7 +1218,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
Expand Down Expand Up @@ -1406,7 +1406,7 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -641,11 +641,11 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $64, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
Expand Down Expand Up @@ -758,11 +758,11 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $64, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
Expand Down Expand Up @@ -919,11 +919,11 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $48, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
Expand Down Expand Up @@ -1042,11 +1042,11 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $48, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,18 @@ define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 {
; NODQ-32-NEXT: subl $128, %esp
; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm0
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
Expand Down Expand Up @@ -371,18 +371,18 @@ define <8 x double> @uitofp_v8i64_v8f64(<8 x i64> %x) #0 {
; NODQ-32-NEXT: subl $128, %esp
; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; NODQ-32-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm3[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm3[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; NODQ-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm2[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm1
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractps $1, %xmm3, %eax
; NODQ-32-NEXT: shrl $31, %eax
Expand Down Expand Up @@ -499,19 +499,19 @@ define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-32-NEXT: andl $-8, %esp
; NODQ-32-NEXT: subl $96, %esp
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm1
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm0
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
Expand Down Expand Up @@ -595,19 +595,19 @@ define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-32-NEXT: andl $-8, %esp
; NODQ-32-NEXT: subl $96, %esp
; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; NODQ-32-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm3[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm3[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; NODQ-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm2[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1
; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3]
; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp)
; NODQ-32-NEXT: vextractps $1, %xmm0, %eax
; NODQ-32-NEXT: shrl $31, %eax
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/vec_fp_to_int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
;
; AVX1-LABEL: fptosi_4f32_to_4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX1-NEXT: vcvttss2si %xmm1, %rax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -972,7 +972,7 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
;
; AVX2-LABEL: fptosi_4f32_to_4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -990,7 +990,7 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
;
; AVX512F-LABEL: fptosi_4f32_to_4i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-NEXT: vcvttss2si %xmm1, %rax
; AVX512F-NEXT: vmovq %rax, %xmm1
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -1008,7 +1008,7 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
;
; AVX512VL-LABEL: fptosi_4f32_to_4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm1
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down Expand Up @@ -1062,7 +1062,7 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
;
; AVX1-LABEL: fptosi_8f32_to_4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX1-NEXT: vcvttss2si %xmm1, %rax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -1080,7 +1080,7 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
;
; AVX2-LABEL: fptosi_8f32_to_4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX2-NEXT: vcvttss2si %xmm1, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -1103,7 +1103,7 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
; AVX512F-NEXT: vmovq %rsi, %xmm0
; AVX512F-NEXT: vmovq %rdx, %xmm1
Expand All @@ -1121,7 +1121,7 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
; AVX512VL-NEXT: vmovq %rsi, %xmm0
; AVX512VL-NEXT: vmovq %rdx, %xmm1
Expand Down Expand Up @@ -1566,7 +1566,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
;
; AVX1-LABEL: fptoui_4f32_to_4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
; AVX1-NEXT: vcvttss2si %xmm3, %rax
Expand Down Expand Up @@ -1609,7 +1609,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
;
; AVX2-LABEL: fptoui_4f32_to_4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
; AVX2-NEXT: vcvttss2si %xmm3, %rax
Expand Down Expand Up @@ -1652,7 +1652,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
;
; AVX512F-LABEL: fptoui_4f32_to_4i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
; AVX512F-NEXT: vmovq %rax, %xmm1
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand All @@ -1670,7 +1670,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
;
; AVX512VL-LABEL: fptoui_4f32_to_4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm1
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down Expand Up @@ -1750,7 +1750,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
;
; AVX1-LABEL: fptoui_8f32_to_4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
; AVX1-NEXT: vcvttss2si %xmm3, %rax
Expand Down Expand Up @@ -1793,7 +1793,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
;
; AVX2-LABEL: fptoui_8f32_to_4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
; AVX2-NEXT: vcvttss2si %xmm3, %rax
Expand Down Expand Up @@ -1841,7 +1841,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
; AVX512F-NEXT: vmovq %rsi, %xmm0
; AVX512F-NEXT: vmovq %rdx, %xmm1
Expand All @@ -1859,7 +1859,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
; AVX512VL-NEXT: vmovq %rsi, %xmm0
; AVX512VL-NEXT: vmovq %rdx, %xmm1
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,7 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0
Expand Down Expand Up @@ -3631,7 +3631,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(ptr%a) {
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0
Expand Down Expand Up @@ -5431,7 +5431,7 @@ define float @extract3_sitofp_v4i32_f32(<4 x i32> %x) nounwind {
;
; AVX-LABEL: extract3_sitofp_v4i32_f32:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
Expand All @@ -5457,7 +5457,7 @@ define double @extract3_sitofp_v4i32_f64(<4 x i32> %x) nounwind {
;
; AVX-LABEL: extract3_sitofp_v4i32_f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
Expand Down Expand Up @@ -5489,29 +5489,29 @@ define float @extract3_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
;
; AVX512F-LABEL: extract3_uitofp_v4i32_f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: extract3_uitofp_v4i32_f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: extract3_uitofp_v4i32_f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f32:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VLDQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
Expand Down Expand Up @@ -5543,29 +5543,29 @@ define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
;
; AVX512F-LABEL: extract3_uitofp_v4i32_f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: extract3_uitofp_v4i32_f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: extract3_uitofp_v4i32_f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f64:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VLDQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vec_umulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2748,8 +2748,8 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[1,1,3,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm1[1,1,3,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-fshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpsllq %xmm3, %xmm5, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm2[2,3,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
Expand Down
Loading