80 changes: 40 additions & 40 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-6.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,10 @@ define void @load_i64_stride7_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512-SLOW-NEXT: vmovdqa64 64(%rdi), %zmm4
; AVX512-SLOW-NEXT: vmovdqa64 (%rdi), %zmm5
; AVX512-SLOW-NEXT: vinserti128 $1, 160(%rdi), %ymm0, %ymm0
; AVX512-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,7,14,u>
; AVX512-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,14,u]
; AVX512-SLOW-NEXT: vpermi2q %zmm4, %zmm5, %zmm1
; AVX512-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
; AVX512-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = <9,0,7,u>
; AVX512-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [9,0,7,u]
; AVX512-SLOW-NEXT: vpermi2q %zmm5, %zmm4, %zmm1
; AVX512-SLOW-NEXT: vpbroadcastq 176(%rdi), %ymm2
; AVX512-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
Expand Down Expand Up @@ -356,10 +356,10 @@ define void @load_i64_stride7_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512-FAST-NEXT: vmovdqa64 64(%rdi), %zmm4
; AVX512-FAST-NEXT: vmovdqa64 (%rdi), %zmm5
; AVX512-FAST-NEXT: vinserti128 $1, 160(%rdi), %ymm0, %ymm0
; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,7,14,u>
; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,14,u]
; AVX512-FAST-NEXT: vpermi2q %zmm4, %zmm5, %zmm1
; AVX512-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <9,0,7,u>
; AVX512-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [9,0,7,u]
; AVX512-FAST-NEXT: vpermi2q %zmm5, %zmm4, %zmm1
; AVX512-FAST-NEXT: vpbroadcastq 176(%rdi), %ymm2
; AVX512-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1265,9 +1265,9 @@ define void @load_i8_stride3_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX1-ONLY-NEXT: vpshufb %xmm12, %xmm10, %xmm8
; AVX1-ONLY-NEXT: vpshufb %xmm12, %xmm11, %xmm9
; AVX1-ONLY-NEXT: vpshufb %xmm12, %xmm7, %xmm13
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm14 = <1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u>
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm14 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u]
; AVX1-ONLY-NEXT: vpshufb %xmm14, %xmm6, %xmm6
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm15 = <128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u>
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm15 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u]
; AVX1-ONLY-NEXT: vpshufb %xmm15, %xmm0, %xmm12
; AVX1-ONLY-NEXT: vpor %xmm6, %xmm12, %xmm6
; AVX1-ONLY-NEXT: vmovdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2007,37 +2007,37 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-NEXT: vmovdqa64 64(%rdi), %zmm1
; AVX512BW-NEXT: vmovdqa64 128(%rdi), %zmm2
; AVX512BW-NEXT: vmovdqa64 192(%rdi), %zmm3
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = <128,128,128,128,0,4,8,12,u,u,u,u,u,u,u,u,128,128,128,128,16,20,24,28,u,u,u,u,u,u,u,u,128,128,128,128,32,36,40,44,u,u,u,u,u,u,u,u,128,128,128,128,48,52,56,60,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [128,128,128,128,0,4,8,12,u,u,u,u,u,u,u,u,128,128,128,128,16,20,24,28,u,u,u,u,u,u,u,u,128,128,128,128,32,36,40,44,u,u,u,u,u,u,u,u,128,128,128,128,48,52,56,60,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm4, %zmm3, %zmm5
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = <0,4,8,12,128,128,128,128,u,u,u,u,u,u,u,u,16,20,24,28,128,128,128,128,u,u,u,u,u,u,u,u,32,36,40,44,128,128,128,128,u,u,u,u,u,u,u,u,48,52,56,60,128,128,128,128,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [0,4,8,12,128,128,128,128,u,u,u,u,u,u,u,u,16,20,24,28,128,128,128,128,u,u,u,u,u,u,u,u,32,36,40,44,128,128,128,128,u,u,u,u,u,u,u,u,48,52,56,60,128,128,128,128,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm6, %zmm2, %zmm7
; AVX512BW-NEXT: vporq %zmm5, %zmm7, %zmm5
; AVX512BW-NEXT: vpshufb %zmm4, %zmm1, %zmm4
; AVX512BW-NEXT: vpshufb %zmm6, %zmm0, %zmm6
; AVX512BW-NEXT: vporq %zmm4, %zmm6, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [0,4,8,12,1,5,9,13,16,20,24,28,17,21,25,29]
; AVX512BW-NEXT: vpermt2d %zmm5, %zmm6, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm5 = <128,128,128,128,1,5,9,13,u,u,u,u,u,u,u,u,128,128,128,128,17,21,25,29,u,u,u,u,u,u,u,u,128,128,128,128,33,37,41,45,u,u,u,u,u,u,u,u,128,128,128,128,49,53,57,61,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [128,128,128,128,1,5,9,13,u,u,u,u,u,u,u,u,128,128,128,128,17,21,25,29,u,u,u,u,u,u,u,u,128,128,128,128,33,37,41,45,u,u,u,u,u,u,u,u,128,128,128,128,49,53,57,61,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm5, %zmm3, %zmm7
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm8 = <1,5,9,13,128,128,128,128,u,u,u,u,u,u,u,u,17,21,25,29,128,128,128,128,u,u,u,u,u,u,u,u,33,37,41,45,128,128,128,128,u,u,u,u,u,u,u,u,49,53,57,61,128,128,128,128,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm8 = [1,5,9,13,128,128,128,128,u,u,u,u,u,u,u,u,17,21,25,29,128,128,128,128,u,u,u,u,u,u,u,u,33,37,41,45,128,128,128,128,u,u,u,u,u,u,u,u,49,53,57,61,128,128,128,128,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm8, %zmm2, %zmm9
; AVX512BW-NEXT: vporq %zmm7, %zmm9, %zmm7
; AVX512BW-NEXT: vpshufb %zmm5, %zmm1, %zmm5
; AVX512BW-NEXT: vpshufb %zmm8, %zmm0, %zmm8
; AVX512BW-NEXT: vporq %zmm5, %zmm8, %zmm5
; AVX512BW-NEXT: vpermt2d %zmm7, %zmm6, %zmm5
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm7 = <128,128,128,128,2,6,10,14,u,u,u,u,u,u,u,u,128,128,128,128,18,22,26,30,u,u,u,u,u,u,u,u,128,128,128,128,34,38,42,46,u,u,u,u,u,u,u,u,128,128,128,128,50,54,58,62,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [128,128,128,128,2,6,10,14,u,u,u,u,u,u,u,u,128,128,128,128,18,22,26,30,u,u,u,u,u,u,u,u,128,128,128,128,34,38,42,46,u,u,u,u,u,u,u,u,128,128,128,128,50,54,58,62,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm7, %zmm3, %zmm8
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm9 = <2,6,10,14,128,128,128,128,u,u,u,u,u,u,u,u,18,22,26,30,128,128,128,128,u,u,u,u,u,u,u,u,34,38,42,46,128,128,128,128,u,u,u,u,u,u,u,u,50,54,58,62,128,128,128,128,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm9 = [2,6,10,14,128,128,128,128,u,u,u,u,u,u,u,u,18,22,26,30,128,128,128,128,u,u,u,u,u,u,u,u,34,38,42,46,128,128,128,128,u,u,u,u,u,u,u,u,50,54,58,62,128,128,128,128,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm9, %zmm2, %zmm10
; AVX512BW-NEXT: vporq %zmm8, %zmm10, %zmm8
; AVX512BW-NEXT: vpshufb %zmm7, %zmm1, %zmm7
; AVX512BW-NEXT: vpshufb %zmm9, %zmm0, %zmm9
; AVX512BW-NEXT: vporq %zmm7, %zmm9, %zmm7
; AVX512BW-NEXT: vpermt2d %zmm8, %zmm6, %zmm7
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm8 = <128,128,128,128,3,7,11,15,u,u,u,u,u,u,u,u,128,128,128,128,19,23,27,31,u,u,u,u,u,u,u,u,128,128,128,128,35,39,43,47,u,u,u,u,u,u,u,u,128,128,128,128,51,55,59,63,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm8 = [128,128,128,128,3,7,11,15,u,u,u,u,u,u,u,u,128,128,128,128,19,23,27,31,u,u,u,u,u,u,u,u,128,128,128,128,35,39,43,47,u,u,u,u,u,u,u,u,128,128,128,128,51,55,59,63,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm8, %zmm3, %zmm3
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm9 = <3,7,11,15,128,128,128,128,u,u,u,u,u,u,u,u,19,23,27,31,128,128,128,128,u,u,u,u,u,u,u,u,35,39,43,47,128,128,128,128,u,u,u,u,u,u,u,u,51,55,59,63,128,128,128,128,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm9 = [3,7,11,15,128,128,128,128,u,u,u,u,u,u,u,u,19,23,27,31,128,128,128,128,u,u,u,u,u,u,u,u,35,39,43,47,128,128,128,128,u,u,u,u,u,u,u,u,51,55,59,63,128,128,128,128,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb %zmm9, %zmm2, %zmm2
; AVX512BW-NEXT: vporq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpshufb %zmm8, %zmm1, %zmm1
Expand Down
84 changes: 42 additions & 42 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll

Large diffs are not rendered by default.

182 changes: 91 additions & 91 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll

Large diffs are not rendered by default.

458 changes: 229 additions & 229 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll

Large diffs are not rendered by default.

128 changes: 64 additions & 64 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-3.ll

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,10 @@ define void @store_i16_stride4_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
; AVX2-ONLY-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
; AVX2-ONLY-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,2,0,2]
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm3 = <u,u,u,u,0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15>
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm3 = [u,u,u,u,0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15]
; AVX2-ONLY-NEXT: vpshufb %ymm3, %ymm2, %ymm2
; AVX2-ONLY-NEXT: vpermq {{.*#+}} ymm4 = ymm0[0,2,0,2]
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm5 = <0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u>
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm5 = [0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u]
; AVX2-ONLY-NEXT: vpshufb %ymm5, %ymm4, %ymm4
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2],ymm2[3],ymm4[4],ymm2[5],ymm4[6],ymm2[7]
; AVX2-ONLY-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,1,3]
Expand All @@ -295,10 +295,10 @@ define void @store_i16_stride4_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = ymm1[1,3,1,3]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = <u,u,u,u,0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15>
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [u,u,u,u,0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15]
; AVX512F-NEXT: vpshufb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpermq {{.*#+}} ymm4 = ymm0[1,3,1,3]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = <0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u>
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u]
; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2],ymm2[3],ymm4[4],ymm2[5],ymm4[6],ymm2[7]
; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,0,2]
Expand Down Expand Up @@ -1409,26 +1409,26 @@ define void @store_i16_stride4_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm1
; AVX512BW-NEXT: vmovdqa64 (%rdx), %zmm2
; AVX512BW-NEXT: vmovdqa64 (%rcx), %zmm3
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = <u,u,0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [u,u,0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39]
; AVX512BW-NEXT: vpermi2w %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm5 = <0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39,u,u]
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm5
; AVX512BW-NEXT: movw $-21846, %ax # imm = 0xAAAA
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vmovdqa32 %zmm4, %zmm5 {%k1}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = <u,u,8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [u,u,8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47]
; AVX512BW-NEXT: vpermi2w %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = <8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47,u,u]
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm6
; AVX512BW-NEXT: vmovdqa32 %zmm4, %zmm6 {%k1}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = <u,u,16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [u,u,16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55]
; AVX512BW-NEXT: vpermi2w %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm7 = <16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55,u,u]
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm7
; AVX512BW-NEXT: vmovdqa32 %zmm4, %zmm7 {%k1}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = <u,u,24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm4 = [u,u,24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63]
; AVX512BW-NEXT: vpermi2w %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = <24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63,u,u]
; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa32 %zmm4, %zmm2 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm2, 192(%r8)
Expand Down Expand Up @@ -3014,32 +3014,32 @@ define void @store_i16_stride4_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512BW-NEXT: vmovdqa64 64(%rdx), %zmm5
; AVX512BW-NEXT: vmovdqa64 (%rcx), %zmm6
; AVX512BW-NEXT: vmovdqa64 64(%rcx), %zmm7
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm8 = <u,u,8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm8 = [u,u,8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47]
; AVX512BW-NEXT: vmovdqa64 %zmm4, %zmm9
; AVX512BW-NEXT: vpermt2w %zmm6, %zmm8, %zmm9
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm10 = <8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm10 = [8,40,u,u,9,41,u,u,10,42,u,u,11,43,u,u,12,44,u,u,13,45,u,u,14,46,u,u,15,47,u,u]
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm11
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm10, %zmm11
; AVX512BW-NEXT: movw $-21846, %ax # imm = 0xAAAA
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vmovdqa32 %zmm9, %zmm11 {%k1}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm9 = <u,u,0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm9 = [u,u,0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39]
; AVX512BW-NEXT: vmovdqa64 %zmm4, %zmm12
; AVX512BW-NEXT: vpermt2w %zmm6, %zmm9, %zmm12
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm13 = <0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm13 = [0,32,u,u,1,33,u,u,2,34,u,u,3,35,u,u,4,36,u,u,5,37,u,u,6,38,u,u,7,39,u,u]
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm14
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm13, %zmm14
; AVX512BW-NEXT: vmovdqa32 %zmm12, %zmm14 {%k1}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm12 = <u,u,24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm12 = [u,u,24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63]
; AVX512BW-NEXT: vmovdqa64 %zmm4, %zmm15
; AVX512BW-NEXT: vpermt2w %zmm6, %zmm12, %zmm15
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm16 = <24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm16 = [24,56,u,u,25,57,u,u,26,58,u,u,27,59,u,u,28,60,u,u,29,61,u,u,30,62,u,u,31,63,u,u]
; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm17
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm16, %zmm17
; AVX512BW-NEXT: vmovdqa32 %zmm15, %zmm17 {%k1}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm15 = <u,u,16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm15 = [u,u,16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55]
; AVX512BW-NEXT: vpermt2w %zmm6, %zmm15, %zmm4
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = <16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,48,u,u,17,49,u,u,18,50,u,u,19,51,u,u,20,52,u,u,21,53,u,u,22,54,u,u,23,55,u,u]
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm6, %zmm0
; AVX512BW-NEXT: vmovdqa32 %zmm4, %zmm0 {%k1}
; AVX512BW-NEXT: vpermi2w %zmm7, %zmm5, %zmm8
Expand Down
236 changes: 118 additions & 118 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll

Large diffs are not rendered by default.

168 changes: 84 additions & 84 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll

Large diffs are not rendered by default.

700 changes: 350 additions & 350 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll

Large diffs are not rendered by default.

324 changes: 162 additions & 162 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ define void @store_i32_stride3_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,1,3,5,u,u>
; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,2,4,1,3,5,u,u]
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovlps %xmm1, 16(%rcx)
Expand Down Expand Up @@ -120,7 +120,7 @@ define void @store_i32_stride3_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-SLOW-NEXT: vmovaps (%rdi), %xmm0
; AVX2-SLOW-NEXT: vmovaps (%rsi), %xmm1
; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm3 = <0,4,u,1,5,u,2,6>
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm3 = [0,4,u,1,5,u,2,6]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm3, %ymm2
; AVX2-SLOW-NEXT: vbroadcastsd (%rdx), %ymm3
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
Expand All @@ -140,7 +140,7 @@ define void @store_i32_stride3_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm1
; AVX2-FAST-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2],xmm2[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <0,4,u,1,5,u,2,6>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,u,1,5,u,2,6]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vbroadcastsd (%rdx), %ymm2
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2],ymm0[3,4],ymm2[5],ymm0[6,7]
Expand All @@ -154,7 +154,7 @@ define void @store_i32_stride3_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm0
; AVX2-FAST-PERLANE-NEXT: vmovaps (%rsi), %xmm1
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm3 = <0,4,u,1,5,u,2,6>
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm3 = [0,4,u,1,5,u,2,6]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm2, %ymm3, %ymm2
; AVX2-FAST-PERLANE-NEXT: vbroadcastsd (%rdx), %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
Expand All @@ -171,7 +171,7 @@ define void @store_i32_stride3_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vmovaps (%rdi), %xmm0
; AVX512-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
; AVX512-NEXT: vinsertf32x4 $2, (%rdx), %zmm0, %zmm0
; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = <0,4,8,1,5,9,2,6,10,3,7,11,u,u,u,u>
; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,8,1,5,9,2,6,10,3,7,11,u,u,u,u]
; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: vextractf32x4 $2, %zmm0, 32(%rcx)
; AVX512-NEXT: vmovaps %ymm0, (%rcx)
Expand Down Expand Up @@ -669,15 +669,15 @@ define void @store_i32_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512-NEXT: vmovdqa64 (%rsi), %zmm1
; AVX512-NEXT: vmovdqa64 (%rdx), %zmm2
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = <0,16,u,1,17,u,2,18,u,3,19,u,4,20,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,16,u,1,17,u,2,18,u,3,19,u,4,20,u,5]
; AVX512-NEXT: vpermi2d %zmm1, %zmm0, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15]
; AVX512-NEXT: vpermi2d %zmm2, %zmm3, %zmm4
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = <5,u,22,6,u,23,7,u,24,8,u,25,9,u,26,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,u,22,6,u,23,7,u,24,8,u,25,9,u,26,10]
; AVX512-NEXT: vpermi2d %zmm0, %zmm1, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm5 = [0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15]
; AVX512-NEXT: vpermi2d %zmm2, %zmm3, %zmm5
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = <u,11,27,u,12,28,u,13,29,u,14,30,u,15,31,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [u,11,27,u,12,28,u,13,29,u,14,30,u,15,31,u]
; AVX512-NEXT: vpermi2d %zmm1, %zmm0, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31]
; AVX512-NEXT: vpermi2d %zmm2, %zmm3, %zmm0
Expand Down Expand Up @@ -1328,17 +1328,17 @@ define void @store_i32_stride3_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vmovdqa64 64(%rsi), %zmm3
; AVX512-NEXT: vmovdqa64 (%rdx), %zmm4
; AVX512-NEXT: vmovdqa64 64(%rdx), %zmm5
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm6 = <0,16,u,1,17,u,2,18,u,3,19,u,4,20,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm6 = [0,16,u,1,17,u,2,18,u,3,19,u,4,20,u,5]
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm7
; AVX512-NEXT: vpermt2d %zmm2, %zmm6, %zmm7
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm8 = [0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15]
; AVX512-NEXT: vpermt2d %zmm4, %zmm8, %zmm7
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm9 = <u,11,27,u,12,28,u,13,29,u,14,30,u,15,31,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm9 = [u,11,27,u,12,28,u,13,29,u,14,30,u,15,31,u]
; AVX512-NEXT: vmovdqa64 %zmm1, %zmm10
; AVX512-NEXT: vpermt2d %zmm3, %zmm9, %zmm10
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm11 = [26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31]
; AVX512-NEXT: vpermt2d %zmm5, %zmm11, %zmm10
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = <5,u,22,6,u,23,7,u,24,8,u,25,9,u,26,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = [5,u,22,6,u,23,7,u,24,8,u,25,9,u,26,10]
; AVX512-NEXT: vmovdqa64 %zmm3, %zmm13
; AVX512-NEXT: vpermt2d %zmm1, %zmm12, %zmm13
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm14 = [0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15]
Expand Down Expand Up @@ -2734,17 +2734,17 @@ define void @store_i32_stride3_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vmovdqa64 64(%rdx), %zmm9
; AVX512-NEXT: vmovdqa64 128(%rdx), %zmm10
; AVX512-NEXT: vmovdqa64 192(%rdx), %zmm11
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = <0,16,u,1,17,u,2,18,u,3,19,u,4,20,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = [0,16,u,1,17,u,2,18,u,3,19,u,4,20,u,5]
; AVX512-NEXT: vmovdqa64 %zmm3, %zmm13
; AVX512-NEXT: vpermt2d %zmm4, %zmm12, %zmm13
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm14 = [0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15]
; AVX512-NEXT: vpermt2d %zmm8, %zmm14, %zmm13
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm15 = <u,11,27,u,12,28,u,13,29,u,14,30,u,15,31,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm15 = [u,11,27,u,12,28,u,13,29,u,14,30,u,15,31,u]
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm16
; AVX512-NEXT: vpermt2d %zmm7, %zmm15, %zmm16
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm17 = [26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31]
; AVX512-NEXT: vpermt2d %zmm11, %zmm17, %zmm16
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm18 = <5,u,22,6,u,23,7,u,24,8,u,25,9,u,26,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm18 = [5,u,22,6,u,23,7,u,24,8,u,25,9,u,26,10]
; AVX512-NEXT: vmovdqa64 %zmm7, %zmm19
; AVX512-NEXT: vpermt2d %zmm0, %zmm18, %zmm19
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm20 = [0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15]
Expand Down
104 changes: 52 additions & 52 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-4.ll

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define void @store_i32_stride5_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; AVX2-ONLY-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-ONLY-NEXT: vmovq %rax, %xmm2
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm3 = <0,2,4,6,u,1,3,5>
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,u,1,3,5]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm3, %ymm0
; AVX2-ONLY-NEXT: vmovd %eax, %xmm3
; AVX2-ONLY-NEXT: vpbroadcastd %xmm3, %ymm3
Expand All @@ -96,7 +96,7 @@ define void @store_i32_stride5_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX512-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = <0,2,4,6,8,1,3,5,7,9,u,u,u,u,u,u>
; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,6,8,1,3,5,7,9,u,u,u,u,u,u]
; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm1
; AVX512-NEXT: vmovlps %xmm1, 32(%r9)
Expand Down Expand Up @@ -199,16 +199,16 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-SLOW-NEXT: vmovaps (%r8), %xmm4
; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm5
; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm7 = <u,u,0,4,u,u,u,1>
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm7 = [u,u,0,4,u,u,u,1]
; AVX2-SLOW-NEXT: vpermps %ymm6, %ymm7, %ymm7
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm8 = <u,4,u,u,u,1,5,u>
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm8 = [u,4,u,u,u,1,5,u]
; AVX2-SLOW-NEXT: vpermps %ymm5, %ymm8, %ymm8
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm7 = ymm8[0,1],ymm7[2,3],ymm8[4,5,6],ymm7[7]
; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm7[1,2,3],ymm0[4],ymm7[5,6,7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm7 = <u,u,2,6,u,u,u,3>
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm7 = [u,u,2,6,u,u,u,3]
; AVX2-SLOW-NEXT: vpermps %ymm5, %ymm7, %ymm5
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm7 = <5,u,u,u,2,6,u,u>
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm7 = [5,u,u,u,2,6,u,u]
; AVX2-SLOW-NEXT: vpermps %ymm6, %ymm7, %ymm6
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3],ymm6[4,5],ymm5[6,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm6 = ymm4[0,1,2,1]
Expand All @@ -230,16 +230,16 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vmovaps (%r8), %xmm3
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm4
; AVX2-FAST-NEXT: vinsertf128 $1, (%rcx), %ymm2, %ymm2
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <u,u,0,4,u,u,u,1>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = [u,u,0,4,u,u,u,1]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm5, %ymm5
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm6 = <u,4,u,u,u,1,5,u>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm6 = [u,4,u,u,u,1,5,u]
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm6, %ymm6
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3],ymm6[4,5,6],ymm5[7]
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm5[1,2,3],ymm0[4],ymm5[5,6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <u,u,2,6,u,u,u,3>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = [u,u,2,6,u,u,u,3]
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm5, %ymm4
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <5,u,u,u,2,6,u,u>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = [5,u,u,u,2,6,u,u]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm5, %ymm5
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3],ymm5[4,5],ymm4[6,7]
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm3[0,1,2,1]
Expand All @@ -264,16 +264,16 @@ define void @store_i32_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-PERLANE-NEXT: vmovaps (%r8), %xmm4
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm5
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm7 = <u,u,0,4,u,u,u,1>
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm7 = [u,u,0,4,u,u,u,1]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm6, %ymm7, %ymm7
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm8 = <u,4,u,u,u,1,5,u>
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm8 = [u,4,u,u,u,1,5,u]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm5, %ymm8, %ymm8
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm7 = ymm8[0,1],ymm7[2,3],ymm8[4,5,6],ymm7[7]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm7[1,2,3],ymm0[4],ymm7[5,6,7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm7 = <u,u,2,6,u,u,u,3>
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm7 = [u,u,2,6,u,u,u,3]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm5, %ymm7, %ymm5
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm7 = <5,u,u,u,2,6,u,u>
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm7 = [5,u,u,u,2,6,u,u]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm6, %ymm7, %ymm6
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3],ymm6[4,5],ymm5[6,7]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm6 = ymm4[0,1,2,1]
Expand Down Expand Up @@ -528,7 +528,7 @@ define void @store_i32_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vmovaps (%rsi), %xmm6
; AVX2-FAST-NEXT: vmovaps (%rdi), %xmm7
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} xmm5 = xmm7[2],xmm6[2],xmm7[3],xmm6[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm8 = <0,1,0,1,u,u,2,2>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm8 = [0,1,0,1,u,u,2,2]
; AVX2-FAST-NEXT: vpermps %ymm5, %ymm8, %ymm5
; AVX2-FAST-NEXT: vmovaps (%rdx), %xmm9
; AVX2-FAST-NEXT: vmovaps (%rcx), %xmm10
Expand All @@ -541,7 +541,7 @@ define void @store_i32_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: vunpcklps {{.*#+}} xmm9 = xmm9[0],xmm10[0],xmm9[1],xmm10[1]
; AVX2-FAST-NEXT: vpermps %ymm9, %ymm8, %ymm8
; AVX2-FAST-NEXT: vunpcklps {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm7 = <0,1,u,u,3,2,3,u>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm7 = [0,1,u,u,3,2,3,u]
; AVX2-FAST-NEXT: vpermps %ymm6, %ymm7, %ymm7
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6],ymm8[7]
; AVX2-FAST-NEXT: vinsertf128 $1, (%r8), %ymm6, %ymm6
Expand Down Expand Up @@ -648,13 +648,13 @@ define void @store_i32_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vmovdqa (%r8), %ymm2
; AVX512-NEXT: vinserti64x4 $1, (%rsi), %zmm0, %zmm0
; AVX512-NEXT: vinserti64x4 $1, (%rcx), %zmm1, %zmm1
; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = <6,14,u,23,31,7,15,u>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [6,14,u,23,31,7,15,u]
; AVX512-NEXT: vpermi2d %zmm0, %zmm1, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = <11,19,27,u,4,12,20,28,u,5,13,21,29,u,6,14>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [11,19,27,u,4,12,20,28,u,5,13,21,29,u,6,14]
; AVX512-NEXT: vpermi2d %zmm1, %zmm0, %zmm4
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm5 = [0,1,2,19,4,5,6,7,20,9,10,11,12,21,14,15]
; AVX512-NEXT: vpermi2d %zmm2, %zmm4, %zmm5
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = <0,8,16,24,u,1,9,17,25,u,2,10,18,26,u,3>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,8,16,24,u,1,9,17,25,u,2,10,18,26,u,3]
; AVX512-NEXT: vpermi2d %zmm1, %zmm0, %zmm4
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,1,2,3,16,5,6,7,8,17,10,11,12,13,18,15]
; AVX512-NEXT: vpermi2d %zmm2, %zmm4, %zmm0
Expand Down Expand Up @@ -1125,7 +1125,7 @@ define void @store_i32_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX2-FAST-NEXT: vmovaps (%rdi), %xmm0
; AVX2-FAST-NEXT: vmovaps 32(%rdi), %xmm10
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} xmm5 = xmm0[2],xmm6[2],xmm0[3],xmm6[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm12 = <0,1,0,1,u,u,2,2>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm12 = [0,1,0,1,u,u,2,2]
; AVX2-FAST-NEXT: vpermps %ymm5, %ymm12, %ymm5
; AVX2-FAST-NEXT: vmovaps (%rdx), %xmm8
; AVX2-FAST-NEXT: vmovaps 32(%rdx), %xmm14
Expand Down Expand Up @@ -2486,7 +2486,7 @@ define void @store_i32_stride5_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX2-FAST-NEXT: vmovaps 64(%rdi), %xmm13
; AVX2-FAST-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} xmm0 = xmm7[2],xmm6[2],xmm7[3],xmm6[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm12 = <0,1,0,1,u,u,2,2>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm12 = [0,1,0,1,u,u,2,2]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm12, %ymm0
; AVX2-FAST-NEXT: vmovaps (%rdx), %xmm8
; AVX2-FAST-NEXT: vmovaps 32(%rdx), %xmm2
Expand Down Expand Up @@ -5324,7 +5324,7 @@ define void @store_i32_stride5_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX2-FAST-NEXT: vmovaps 64(%rdi), %xmm6
; AVX2-FAST-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} xmm0 = xmm14[2],xmm11[2],xmm14[3],xmm11[3]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm9 = <0,1,0,1,u,u,2,2>
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm9 = [0,1,0,1,u,u,2,2]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm9, %ymm2
; AVX2-FAST-NEXT: vmovaps (%rdx), %xmm15
; AVX2-FAST-NEXT: vmovaps 32(%rdx), %xmm10
Expand Down
174 changes: 87 additions & 87 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll

Large diffs are not rendered by default.

436 changes: 218 additions & 218 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll

Large diffs are not rendered by default.

416 changes: 208 additions & 208 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll

Large diffs are not rendered by default.

26 changes: 13 additions & 13 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ define void @store_i64_stride3_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vmovaps (%rdi), %xmm0
; AVX512-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
; AVX512-NEXT: vinsertf32x4 $2, (%rdx), %zmm0, %zmm0
; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = <0,2,4,1,3,5,u,u>
; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [0,2,4,1,3,5,u,u]
; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512-NEXT: vextractf32x4 $2, %zmm0, 32(%rcx)
; AVX512-NEXT: vmovaps %ymm0, (%rcx)
Expand Down Expand Up @@ -308,15 +308,15 @@ define void @store_i64_stride3_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512-NEXT: vmovdqa64 (%rsi), %zmm1
; AVX512-NEXT: vmovdqa64 (%rdx), %zmm2
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = <0,8,u,1,9,u,2,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,u,1,9,u,2,10]
; AVX512-NEXT: vpermi2q %zmm1, %zmm0, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,8,3,4,9,6,7]
; AVX512-NEXT: vpermi2q %zmm2, %zmm3, %zmm4
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = <u,3,11,u,4,12,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [u,3,11,u,4,12,u,5]
; AVX512-NEXT: vpermi2q %zmm1, %zmm0, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm5 = [10,1,2,11,4,5,12,7]
; AVX512-NEXT: vpermi2q %zmm2, %zmm3, %zmm5
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = <5,u,14,6,u,15,7,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,u,14,6,u,15,7,u]
; AVX512-NEXT: vpermi2q %zmm0, %zmm1, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,13,2,3,14,5,6,15]
; AVX512-NEXT: vpermi2q %zmm2, %zmm3, %zmm0
Expand Down Expand Up @@ -605,17 +605,17 @@ define void @store_i64_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vmovdqa64 64(%rsi), %zmm3
; AVX512-NEXT: vmovdqa64 (%rdx), %zmm4
; AVX512-NEXT: vmovdqa64 64(%rdx), %zmm5
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm6 = <0,8,u,1,9,u,2,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm6 = [0,8,u,1,9,u,2,10]
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm7
; AVX512-NEXT: vpermt2q %zmm2, %zmm6, %zmm7
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm8 = [0,1,8,3,4,9,6,7]
; AVX512-NEXT: vpermt2q %zmm4, %zmm8, %zmm7
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm9 = <5,u,14,6,u,15,7,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm9 = [5,u,14,6,u,15,7,u]
; AVX512-NEXT: vmovdqa64 %zmm3, %zmm10
; AVX512-NEXT: vpermt2q %zmm1, %zmm9, %zmm10
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm11 = [0,13,2,3,14,5,6,15]
; AVX512-NEXT: vpermt2q %zmm5, %zmm11, %zmm10
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = <u,3,11,u,4,12,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = [u,3,11,u,4,12,u,5]
; AVX512-NEXT: vmovdqa64 %zmm1, %zmm13
; AVX512-NEXT: vpermt2q %zmm3, %zmm12, %zmm13
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm14 = [10,1,2,11,4,5,12,7]
Expand Down Expand Up @@ -1246,17 +1246,17 @@ define void @store_i64_stride3_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vmovdqa64 64(%rdx), %zmm9
; AVX512-NEXT: vmovdqa64 128(%rdx), %zmm10
; AVX512-NEXT: vmovdqa64 192(%rdx), %zmm11
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = <0,8,u,1,9,u,2,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm12 = [0,8,u,1,9,u,2,10]
; AVX512-NEXT: vmovdqa64 %zmm3, %zmm13
; AVX512-NEXT: vpermt2q %zmm4, %zmm12, %zmm13
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm14 = [0,1,8,3,4,9,6,7]
; AVX512-NEXT: vpermt2q %zmm8, %zmm14, %zmm13
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm15 = <5,u,14,6,u,15,7,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm15 = [5,u,14,6,u,15,7,u]
; AVX512-NEXT: vmovdqa64 %zmm7, %zmm16
; AVX512-NEXT: vpermt2q %zmm0, %zmm15, %zmm16
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm17 = [0,13,2,3,14,5,6,15]
; AVX512-NEXT: vpermt2q %zmm11, %zmm17, %zmm16
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm18 = <u,3,11,u,4,12,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm18 = [u,3,11,u,4,12,u,5]
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm19
; AVX512-NEXT: vpermt2q %zmm7, %zmm18, %zmm19
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm20 = [10,1,2,11,4,5,12,7]
Expand Down Expand Up @@ -2589,17 +2589,17 @@ define void @store_i64_stride3_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vmovdqa64 64(%rdx), %zmm25
; AVX512-NEXT: vmovdqa64 128(%rdx), %zmm29
; AVX512-NEXT: vmovdqa64 192(%rdx), %zmm31
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm14 = <0,8,u,1,9,u,2,10>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm14 = [0,8,u,1,9,u,2,10]
; AVX512-NEXT: vmovdqa64 %zmm13, %zmm3
; AVX512-NEXT: vpermt2q %zmm0, %zmm14, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm19 = [0,1,8,3,4,9,6,7]
; AVX512-NEXT: vpermt2q %zmm15, %zmm19, %zmm3
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm17 = <u,3,11,u,4,12,u,5>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm17 = [u,3,11,u,4,12,u,5]
; AVX512-NEXT: vmovdqa64 %zmm13, %zmm10
; AVX512-NEXT: vpermt2q %zmm0, %zmm17, %zmm10
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm21 = [10,1,2,11,4,5,12,7]
; AVX512-NEXT: vpermt2q %zmm15, %zmm21, %zmm10
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm22 = <5,u,14,6,u,15,7,u>
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm22 = [5,u,14,6,u,15,7,u]
; AVX512-NEXT: vpermt2q %zmm13, %zmm22, %zmm0
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm23 = [0,13,2,3,14,5,6,15]
; AVX512-NEXT: vpermt2q %zmm15, %zmm23, %zmm0
Expand Down
128 changes: 64 additions & 64 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll

Large diffs are not rendered by default.

134 changes: 67 additions & 67 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-5.ll

Large diffs are not rendered by default.

68 changes: 34 additions & 34 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-6.ll

Large diffs are not rendered by default.

394 changes: 197 additions & 197 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -855,9 +855,9 @@ define void @store_i8_stride3_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpshufb %xmm8, %xmm6, %xmm0
; AVX1-ONLY-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX1-ONLY-NEXT: vpshufb %xmm8, %xmm2, %xmm8
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm10 = <u,u,u,u,u,128,128,128,128,128,128,6,7,8,9,10>
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm10 = [u,u,u,u,u,128,128,128,128,128,128,6,7,8,9,10]
; AVX1-ONLY-NEXT: vpshufb %xmm10, %xmm2, %xmm2
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm11 = <u,u,u,u,u,5,6,7,8,9,10,128,128,128,128,128>
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm11 = [u,u,u,u,u,5,6,7,8,9,10,128,128,128,128,128]
; AVX1-ONLY-NEXT: vmovdqa 16(%rsi), %xmm12
; AVX1-ONLY-NEXT: vmovdqa 32(%rsi), %xmm13
; AVX1-ONLY-NEXT: vmovdqa 48(%rsi), %xmm14
Expand All @@ -878,7 +878,7 @@ define void @store_i8_stride3_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpor %xmm7, %xmm10, %xmm0
; AVX1-ONLY-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX1-ONLY-NEXT: vpunpckhbw {{.*#+}} xmm7 = xmm3[8],xmm15[8],xmm3[9],xmm15[9],xmm3[10],xmm15[10],xmm3[11],xmm15[11],xmm3[12],xmm15[12],xmm3[13],xmm15[13],xmm3[14],xmm15[14],xmm3[15],xmm15[15]
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm10 = <u,u,u,u,u,4,6,8,10,12,14,7,9,11,13,15>
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm10 = [u,u,u,u,u,4,6,8,10,12,14,7,9,11,13,15]
; AVX1-ONLY-NEXT: vpshufb %xmm10, %xmm7, %xmm6
; AVX1-ONLY-NEXT: vmovdqa %xmm1, %xmm0
; AVX1-ONLY-NEXT: vpunpckhbw {{.*#+}} xmm7 = xmm1[8],xmm12[8],xmm1[9],xmm12[9],xmm1[10],xmm12[10],xmm1[11],xmm12[11],xmm1[12],xmm12[12],xmm1[13],xmm12[13],xmm1[14],xmm12[14],xmm1[15],xmm12[15]
Expand Down
242 changes: 121 additions & 121 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll

Large diffs are not rendered by default.

120 changes: 60 additions & 60 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll

Large diffs are not rendered by default.

500 changes: 250 additions & 250 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ declare <4 x i16> @llvm.umul.fix.sat.v4i16(<4 x i16>, <4 x i16>, i32 immarg)
define <4 x i16> @smulfix(<4 x i16> %a) {
; CHECK-LABEL: smulfix:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <1,2,3,4,u,u,u,u>
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,u,u,u,u]
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: pmullw %xmm1, %xmm2
; CHECK-NEXT: psrlw $15, %xmm2
Expand All @@ -28,7 +28,7 @@ define <4 x i16> @smulfix(<4 x i16> %a) {
define <4 x i16> @umulfix(<4 x i16> %a) {
; CHECK-LABEL: umulfix:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <1,2,3,4,u,u,u,u>
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,u,u,u,u]
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: pmullw %xmm1, %xmm2
; CHECK-NEXT: psrlw $15, %xmm2
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-partial-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
; SSE-LABEL: xor_undef_elts_alt:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movaps {{.*#+}} xmm2 = <u,u,44,12>
; SSE-NEXT: movaps {{.*#+}} xmm2 = [u,u,44,12]
; SSE-NEXT: xorps %xmm0, %xmm2
; SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ define i16 @test_v4i16_v4i8(<4 x i16> %a0) {
;
; SSE41-LABEL: test_v4i16_v4i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,32768,16384,8192,u,u,u,u>
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,32768,16384,8192,u,u,u,u]
; SSE41-NEXT: pmulhuw %xmm0, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ define void @mask_replication_factor3_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512F-ONLY-NEXT: kmovw (%rdi), %k1
; AVX512F-ONLY-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512F-ONLY-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512F-ONLY-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,0,1,1,1,u,u>
; AVX512F-ONLY-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,1,1,u,u]
; AVX512F-ONLY-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512F-ONLY-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512F-ONLY-NEXT: movb $63, %al
Expand All @@ -498,7 +498,7 @@ define void @mask_replication_factor3_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: kmovb (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %ymm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,0,1,1,1,u,u>
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,1,1,u,u]
; AVX512DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: movb $63, %al
Expand All @@ -516,7 +516,7 @@ define void @mask_replication_factor3_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512BW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,0,1,1,1,u,u>
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,1,1,u,u]
; AVX512BW-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512BW-NEXT: movb $63, %al
Expand All @@ -542,7 +542,7 @@ define void @mask_replication_factor3_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512F-ONLY: # %bb.0:
; AVX512F-ONLY-NEXT: kmovw (%rdi), %k1
; AVX512F-ONLY-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,1,1,1,2,2,2,3,3,3,u,u,u,u>
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,1,1,2,2,2,3,3,3,u,u,u,u]
; AVX512F-ONLY-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-ONLY-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-ONLY-NEXT: movw $4095, %ax # imm = 0xFFF
Expand All @@ -558,7 +558,7 @@ define void @mask_replication_factor3_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: kmovw (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,1,1,1,2,2,2,3,3,3,u,u,u,u>
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,1,1,2,2,2,3,3,3,u,u,u,u]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: movw $4095, %ax # imm = 0xFFF
Expand All @@ -574,7 +574,7 @@ define void @mask_replication_factor3_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,1,1,1,2,2,2,3,3,3,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,1,1,2,2,2,3,3,3,u,u,u,u]
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512BW-NEXT: movw $4095, %ax # imm = 0xFFF
Expand Down Expand Up @@ -646,7 +646,7 @@ define void @mask_replication_factor3_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: movl $16777215, %eax # imm = 0xFFFFFF
Expand Down Expand Up @@ -3121,7 +3121,7 @@ define void @mask_replication_factor5_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512F-ONLY: # %bb.0:
; AVX512F-ONLY-NEXT: kmovw (%rdi), %k1
; AVX512F-ONLY-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,1,1,1,1,1,u,u,u,u,u,u>
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,1,1,1,1,1,u,u,u,u,u,u]
; AVX512F-ONLY-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-ONLY-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-ONLY-NEXT: movw $1023, %ax # imm = 0x3FF
Expand All @@ -3138,7 +3138,7 @@ define void @mask_replication_factor5_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: kmovw (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,1,1,1,1,1,u,u,u,u,u,u>
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,1,1,1,1,1,u,u,u,u,u,u]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: movw $1023, %ax # imm = 0x3FF
Expand All @@ -3155,7 +3155,7 @@ define void @mask_replication_factor5_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,1,1,1,1,1,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,1,1,1,1,1,u,u,u,u,u,u]
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512BW-NEXT: movw $1023, %ax # imm = 0x3FF
Expand Down Expand Up @@ -3219,7 +3219,7 @@ define void @mask_replication_factor5_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovd (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: movl $1048575, %eax # imm = 0xFFFFF
Expand Down Expand Up @@ -3324,7 +3324,7 @@ define void @mask_replication_factor5_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512VBMI-ONLY: # %bb.0:
; AVX512VBMI-ONLY-NEXT: kmovw (%rdi), %k0
; AVX512VBMI-ONLY-NEXT: vpmovm2b %k0, %zmm0
; AVX512VBMI-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,7,7,7,7,7,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VBMI-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,7,7,7,7,7,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VBMI-ONLY-NEXT: vpermb %zmm0, %zmm1, %zmm0
; AVX512VBMI-ONLY-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VBMI-ONLY-NEXT: movabsq $1099511627775, %rax # imm = 0xFFFFFFFFFF
Expand Down Expand Up @@ -5857,7 +5857,7 @@ define void @mask_replication_factor6_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512F-ONLY: # %bb.0:
; AVX512F-ONLY-NEXT: kmovw (%rdi), %k1
; AVX512F-ONLY-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,1,1,1,1,1,1,u,u,u,u>
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,1,1,1,1,1,u,u,u,u]
; AVX512F-ONLY-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-ONLY-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-ONLY-NEXT: movw $4095, %ax # imm = 0xFFF
Expand All @@ -5873,7 +5873,7 @@ define void @mask_replication_factor6_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: kmovw (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,1,1,1,1,1,1,u,u,u,u>
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,1,1,1,1,1,u,u,u,u]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: movw $4095, %ax # imm = 0xFFF
Expand All @@ -5889,7 +5889,7 @@ define void @mask_replication_factor6_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,1,1,1,1,1,1,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,1,1,1,1,1,u,u,u,u]
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512BW-NEXT: movw $4095, %ax # imm = 0xFFF
Expand Down Expand Up @@ -5994,7 +5994,7 @@ define void @mask_replication_factor6_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovd (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: movl $16777215, %eax # imm = 0xFFFFFF
Expand Down Expand Up @@ -9071,7 +9071,7 @@ define void @mask_replication_factor7_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512F-ONLY: # %bb.0:
; AVX512F-ONLY-NEXT: kmovw (%rdi), %k1
; AVX512F-ONLY-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,0,1,1,1,1,1,1,1,u,u>
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,1,1,1,1,1,1,1,u,u]
; AVX512F-ONLY-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-ONLY-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-ONLY-NEXT: movw $16383, %ax # imm = 0x3FFF
Expand All @@ -9089,7 +9089,7 @@ define void @mask_replication_factor7_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: kmovw (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,0,1,1,1,1,1,1,1,u,u>
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,1,1,1,1,1,1,1,u,u]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: movw $16383, %ax # imm = 0x3FFF
Expand All @@ -9107,7 +9107,7 @@ define void @mask_replication_factor7_vf2(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,0,1,1,1,1,1,1,1,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,1,1,1,1,1,1,1,u,u]
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512BW-NEXT: movw $16383, %ax # imm = 0x3FFF
Expand All @@ -9134,7 +9134,7 @@ define void @mask_replication_factor7_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512F-ONLY: # %bb.0:
; AVX512F-ONLY-NEXT: kmovw (%rdi), %k1
; AVX512F-ONLY-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <2,2,2,2,2,3,3,3,3,3,3,3,u,u,u,u>
; AVX512F-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,2,2,2,3,3,3,3,3,3,3,u,u,u,u]
; AVX512F-ONLY-NEXT: vpermd %zmm0, %zmm1, %zmm1
; AVX512F-ONLY-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-ONLY-NEXT: movw $4095, %ax # imm = 0xFFF
Expand All @@ -9155,7 +9155,7 @@ define void @mask_replication_factor7_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: kmovw (%rdi), %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = <2,2,2,2,2,3,3,3,3,3,3,3,u,u,u,u>
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,2,2,2,3,3,3,3,3,3,3,u,u,u,u]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm1
; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: movw $4095, %ax # imm = 0xFFF
Expand All @@ -9176,7 +9176,7 @@ define void @mask_replication_factor7_vf4(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: kmovd (%rdi), %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,u,u,u,u>
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,u,u,u,u]
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: movl $268435455, %eax # imm = 0xFFFFFFF
Expand Down Expand Up @@ -9362,7 +9362,7 @@ define void @mask_replication_factor7_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512VBMI-ONLY: # %bb.0:
; AVX512VBMI-ONLY-NEXT: kmovw (%rdi), %k0
; AVX512VBMI-ONLY-NEXT: vpmovm2b %k0, %zmm0
; AVX512VBMI-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,7,7,7,7,7,7,7,u,u,u,u,u,u,u,u>
; AVX512VBMI-ONLY-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,7,7,7,7,7,7,7,u,u,u,u,u,u,u,u]
; AVX512VBMI-ONLY-NEXT: vpermb %zmm0, %zmm1, %zmm0
; AVX512VBMI-ONLY-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VBMI-ONLY-NEXT: movabsq $72057594037927935, %rax # imm = 0xFFFFFFFFFFFFFF
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1420,7 +1420,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; SSE41-LABEL: constant_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,u,16384,8192,4096,2048,1024,512>
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,u,16384,8192,4096,2048,1024,512]
; SSE41-NEXT: pmulhw %xmm0, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; SSE41-NEXT: psraw $1, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
;
; SSE41-LABEL: constant_shift_v4i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,u,16384,8192,u,u,u,u>
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,u,16384,8192,u,u,u,u]
; SSE41-NEXT: pmulhw %xmm0, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; SSE41-NEXT: psraw $1, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; SSE41-LABEL: constant_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,32768,16384,8192,4096,2048,1024,512>
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,32768,16384,8192,4096,2048,1024,512]
; SSE41-NEXT: pmulhuw %xmm0, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; SSE41-NEXT: retq
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
; AVX512DQ-LABEL: constant_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3,4,5,6,7]
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
;
; SSE41-LABEL: constant_shift_v4i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,32768,16384,8192,u,u,u,u>
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,32768,16384,8192,u,u,u,u]
; SSE41-NEXT: pmulhuw %xmm0, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; SSE41-NEXT: retq
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1003,15 +1003,15 @@ define <16 x i8> @shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31(
;
; SSSE3-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: pshufb %xmm2, %xmm1
; SSSE3-NEXT: pshufb %xmm2, %xmm0
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pshufb %xmm2, %xmm1
; SSE41-NEXT: pshufb %xmm2, %xmm0
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Expand Down Expand Up @@ -1819,7 +1819,7 @@ define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(
;
; AVX512VLVBMI-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; AVX512VLVBMI: # %bb.0: # %entry
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0>
; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0]
; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
; AVX512VLVBMI-NEXT: retq
;
Expand Down
64 changes: 32 additions & 32 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3411,7 +3411,7 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
; AVX2: # %bb.0:
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,5,0,6,u,1,u>
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,u,5,0,6,u,1,u]
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
Expand All @@ -3424,7 +3424,7 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,5,0,6,u,1,u>
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,u,5,0,6,u,1,u]
; AVX512VLBW-NEXT: vpermd %ymm0, %ymm2, %ymm2
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
Expand Down Expand Up @@ -3460,7 +3460,7 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
; XOPAVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,5,0,6,u,1,u>
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,u,5,0,6,u,1,u]
; XOPAVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
;
; AVX2-SLOW-LABEL: shuffle_v8f32_08991abb:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
Expand All @@ -454,16 +454,16 @@ define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8f32_08991abb:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: shuffle_v8f32_08991abb:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; AVX2-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
Expand Down Expand Up @@ -533,7 +533,7 @@ define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8f32_09ab1def:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX2-FAST-ALL-NEXT: retq
Expand Down Expand Up @@ -956,7 +956,7 @@ define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [7,2,7,2,7,2,7,2]
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,5,1,1,2,3,5,u]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
; AVX2-FAST-ALL-NEXT: retq
Expand Down Expand Up @@ -1397,7 +1397,7 @@ define <8 x float> @shuffle_v8f32_089abcde(<8 x float> %a, <8 x float> %b) {
;
; AVX2-LABEL: shuffle_v8f32_089abcde:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,2,3,4,5,6>
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,2,3,4,5,6]
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; AVX2-NEXT: retq
Expand Down Expand Up @@ -1452,7 +1452,7 @@ define <8 x float> @shuffle_v8f32_01289abc(<8 x float> %a, <8 x float> %b) {
;
; AVX2-LABEL: shuffle_v8f32_01289abc:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,0,1,2,3,4>
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,u,u,0,1,2,3,4]
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
; AVX2-NEXT: retq
Expand Down Expand Up @@ -2095,9 +2095,9 @@ define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8i32_08991abb:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,1,u,2,3,3]
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX2-FAST-ALL-NEXT: retq
Expand Down Expand Up @@ -2174,7 +2174,7 @@ define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8i32_09ab1def:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,1,u,u,u]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX2-FAST-ALL-NEXT: retq
Expand Down Expand Up @@ -2526,7 +2526,7 @@ define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2OR512VL-LABEL: shuffle_v8i32_002u6u44:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,u,6,u,4,4]
; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
Expand All @@ -2541,7 +2541,7 @@ define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2OR512VL-LABEL: shuffle_v8i32_00uu66uu:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,u,u,6,6,u,u]
; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
Expand All @@ -2556,7 +2556,7 @@ define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2OR512VL-LABEL: shuffle_v8i32_103245uu:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,u,u]
; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
Expand All @@ -2571,7 +2571,7 @@ define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2OR512VL-LABEL: shuffle_v8i32_1133uu67:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,3,3,u,u,6,7]
; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
Expand All @@ -2586,7 +2586,7 @@ define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2OR512VL-LABEL: shuffle_v8i32_0uu354uu:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,u,u,3,5,4,u,u]
; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
Expand All @@ -2601,7 +2601,7 @@ define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2OR512VL-LABEL: shuffle_v8i32_uuu3uu66:
; AVX2OR512VL: # %bb.0:
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [u,u,u,3,u,u,6,6]
; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
Expand All @@ -2628,7 +2628,7 @@ define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-FAST-ALL-LABEL: shuffle_v8i32_6caa87e5:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [u,4,2,2,0,u,6,u]
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
Expand Down Expand Up @@ -2958,7 +2958,7 @@ define <8 x i32> @shuffle_v8i32_089abcde(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_089abcde:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,2,3,4,5,6>
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,0,1,2,3,4,5,6]
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; AVX2-NEXT: retq
Expand Down Expand Up @@ -3031,7 +3031,7 @@ define <8 x i32> @shuffle_v8i32_01289abc(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_01289abc:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,0,1,2,3,4>
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,u,u,0,1,2,3,4]
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
; AVX2-NEXT: retq
Expand Down Expand Up @@ -3244,7 +3244,7 @@ define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_0dcd3f14:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,3,u,1,4>
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [0,u,u,u,3,u,1,4]
; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,3,3]
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ define <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_2
define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) {
; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
; ALL: # %bb.0:
; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1]
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
Expand All @@ -243,7 +243,7 @@ define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<
define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a) {
; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
; ALL: # %bb.0:
; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1]
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@ define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_
; KNL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[4,5,10,11,4,5,6,7,14,15,2,3,4,5,2,3,20,21,26,27,20,21,22,23,30,31,18,19,20,21,18,19]
; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[0,1,10,11,8,9,8,9,14,15,6,7,4,5,14,15,16,17,26,27,24,25,24,25,30,31,22,23,20,21,30,31]
; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = <255,255,255,255,u,u,u,u,255,255,u,u,0,0,255,255,0,0,0,0,u,u,0,0,0,0,u,u,255,255,u,u>
; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,u,u,u,u,255,255,u,u,0,0,255,255,0,0,0,0,u,u,0,0,0,0,u,u,255,255,u,u]
; KNL-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm3
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5,6],ymm0[7],ymm3[8,9,10,11,12,13,14],ymm0[15]
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,10,11,8,9,8,9,14,15,2,3,4,5,2,3,16,17,26,27,24,25,24,25,30,31,18,19,20,21,18,19]
; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = <0,0,0,0,u,u,u,u,0,0,u,u,255,255,0,0,255,255,255,255,u,u,255,255,255,255,u,u,0,0,255,255>
; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,0,0,0,u,u,u,u,0,0,u,u,255,255,0,0,255,255,255,255,u,u,255,255,255,255,u,u,0,0,255,255]
; KNL-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
; SKX: ## %bb.0:
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31>
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31]
; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0
; SKX-NEXT: retq
%c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1, i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 31>
Expand Down
92 changes: 46 additions & 46 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -687,13 +687,13 @@ define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_002u6u44:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,2,u,6,u,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
Expand All @@ -703,13 +703,13 @@ define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,u,u,6,6,u,u]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
Expand Down Expand Up @@ -1553,13 +1553,13 @@ define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_002u6u44:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,2,u,6,u,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_002u6u44:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
Expand All @@ -1569,13 +1569,13 @@ define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_00uu66uu:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,u,u,6,6,u,u]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_00uu66uu:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
Expand All @@ -1585,13 +1585,13 @@ define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_103245uu:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [1,0,3,2,4,5,u,u]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_103245uu:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
Expand All @@ -1601,13 +1601,13 @@ define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_1133uu67:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [1,1,3,3,u,u,6,7]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_1133uu67:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
Expand All @@ -1617,13 +1617,13 @@ define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_0uu354uu:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [0,u,u,3,5,4,u,u]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_0uu354uu:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
Expand All @@ -1633,13 +1633,13 @@ define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_uuu3uu66:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [u,u,u,3,u,u,6,6]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_uuu3uu66:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
; AVX512F-32-NEXT: vmovaps {{.*#+}} zmm1 = [u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ define void @PR39483() {
; X86-AVX2: # %bb.0: # %entry
; X86-AVX2-NEXT: vmovups 32, %ymm0
; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [2,5,0,3,6,u,u,u]
; X86-AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,1,0,3,4,5,4,7]
; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
Expand Down Expand Up @@ -403,7 +403,7 @@ define void @PR39483() {
; X64-AVX2: # %bb.0: # %entry
; X64-AVX2-NEXT: vmovups 32, %ymm0
; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [2,5,0,3,6,u,u,u]
; X64-AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,1,0,3,4,5,4,7]
; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ define <8 x i32> @combine_as_vpermd(<8 x i32> %a0) {
define <8 x float> @combine_as_vpermps(<8 x float> %a0) {
; CHECK-LABEL: combine_as_vpermps:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <6,4,7,5,1,u,4,7>
; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [6,4,7,5,1,u,4,7]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
Expand Down Expand Up @@ -784,7 +784,7 @@ define <8 x float> @constant_fold_permps() {
define <32 x i8> @constant_fold_pshufb_256() {
; CHECK-LABEL: constant_fold_pshufb_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = <14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9,255,0,0,0,u,u,0,0,241,0,0,0,0,0,249,250>
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9,255,0,0,0,u,u,0,0,241,0,0,0,0,0,249,250]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15>, <32 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6, i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6>)
ret <32 x i8> %1
Expand Down Expand Up @@ -832,7 +832,7 @@ define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [u,u,7,2,u,u,3,2]
; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; AVX2-NEXT: ret{{[l|q]}}
Expand All @@ -843,7 +843,7 @@ define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1
; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7]
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = <23,18,7,2,20,u,3,2>
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,18,7,2,20,u,3,2]
; AVX512-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: ret{{[l|q]}}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
; SSE-NEXT: movdqa (%rsi), %xmm4
; SSE-NEXT: movdqa 16(%rsi), %xmm5
; SSE-NEXT: movdqa 32(%rsi), %xmm3
; SSE-NEXT: movdqa {{.*#+}} xmm6 = <128,128,128,128,128,128,2,5,8,11,14,u,u,u,u,u>
; SSE-NEXT: movdqa {{.*#+}} xmm6 = [128,128,128,128,128,128,2,5,8,11,14,u,u,u,u,u]
; SSE-NEXT: pshufb %xmm6, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm7 = <0,3,6,9,12,15,128,128,128,128,128,u,u,u,u,u>
; SSE-NEXT: movdqa {{.*#+}} xmm7 = [0,3,6,9,12,15,128,128,128,128,128,u,u,u,u,u]
; SSE-NEXT: pshufb %xmm7, %xmm2
; SSE-NEXT: por %xmm0, %xmm2
; SSE-NEXT: pshufb %xmm6, %xmm5
Expand All @@ -44,9 +44,9 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) {
; SSE-NEXT: pmullw %xmm5, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pand %xmm5, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm6 = <8,u,9,u,10,u,128,u,128,u,128,u,128,u,128,u>
; SSE-NEXT: movdqa {{.*#+}} xmm6 = [8,u,9,u,10,u,128,u,128,u,128,u,128,u,128,u]
; SSE-NEXT: pshufb %xmm6, %xmm4
; SSE-NEXT: movdqa {{.*#+}} xmm7 = <128,u,128,u,128,u,1,u,4,u,7,u,10,u,13,u>
; SSE-NEXT: movdqa {{.*#+}} xmm7 = [128,u,128,u,128,u,1,u,4,u,7,u,10,u,13,u]
; SSE-NEXT: pshufb %xmm7, %xmm3
; SSE-NEXT: por %xmm4, %xmm3
; SSE-NEXT: pshufb %xmm6, %xmm2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -766,12 +766,12 @@ define <16 x i8> @combine_and_pshufb_or_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
define <16 x i8> @constant_fold_pshufb() {
; SSE-LABEL: constant_fold_pshufb:
; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = <14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9>
; SSE-NEXT: movaps {{.*#+}} xmm0 = [14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9]
; SSE-NEXT: retq
;
; AVX-LABEL: constant_fold_pshufb:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9>
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [14,0,0,0,u,u,0,0,0,0,0,0,0,0,8,9]
; AVX-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6>)
ret <16 x i8> %1
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/vector-shuffle-combining.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1744,7 +1744,7 @@ define <4 x i8> @combine_test1c(ptr %a, ptr %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE41-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE41-NEXT: movaps {{.*#+}} xmm0 = <0,255,255,255,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
Expand Down Expand Up @@ -1838,7 +1838,7 @@ define <4 x i8> @combine_test4c(ptr %a, ptr %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE41-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE41-NEXT: movaps {{.*#+}} xmm0 = <255,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
Expand Down Expand Up @@ -2671,14 +2671,14 @@ define void @combine_scalar_load_with_blend_with_zero(ptr %a0, ptr %a1) {
define <4 x float> @combine_constant_insertion_v4f32(float %f) {
; SSE2-LABEL: combine_constant_insertion_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps {{.*#+}} xmm1 = <u,4.0E+0,5.0E+0,3.0E+0>
; SSE2-NEXT: movaps {{.*#+}} xmm1 = [u,4.0E+0,5.0E+0,3.0E+0]
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_constant_insertion_v4f32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movaps {{.*#+}} xmm1 = <u,4.0E+0,5.0E+0,3.0E+0>
; SSSE3-NEXT: movaps {{.*#+}} xmm1 = [u,4.0E+0,5.0E+0,3.0E+0]
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
Expand All @@ -2701,26 +2701,26 @@ define <4 x i32> @combine_constant_insertion_v4i32(i32 %f) {
; SSE2-LABEL: combine_constant_insertion_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movd %edi, %xmm1
; SSE2-NEXT: movaps {{.*#+}} xmm0 = <u,4,5,30>
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [u,4,5,30]
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_constant_insertion_v4i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movd %edi, %xmm1
; SSSE3-NEXT: movaps {{.*#+}} xmm0 = <u,4,5,30>
; SSSE3-NEXT: movaps {{.*#+}} xmm0 = [u,4,5,30]
; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_constant_insertion_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <u,4,5,30>
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [u,4,5,30]
; SSE41-NEXT: pinsrd $0, %edi, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_constant_insertion_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = <u,4,5,30>
; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [u,4,5,30]
; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm0
; AVX-NEXT: retq
%a0 = insertelement <4 x i32> undef, i32 %f, i32 0
Expand Down Expand Up @@ -2851,7 +2851,7 @@ define <4 x float> @PR30264(<4 x float> %x) {
;
; SSE41-LABEL: PR30264:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps {{.*#+}} xmm1 = <u,u,4.0E+0,1.0E+0>
; SSE41-NEXT: movaps {{.*#+}} xmm1 = [u,u,4.0E+0,1.0E+0]
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm0[0],zero,xmm1[2,3]
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
Expand Down Expand Up @@ -3298,7 +3298,7 @@ define void @PR45604(ptr %dst, ptr %src) {
; SSE41-NEXT: movdqa (%rsi), %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <u,0,11,0,u,0,11,0>
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [u,0,11,0,u,0,11,0]
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
Expand Down Expand Up @@ -3341,7 +3341,7 @@ define void @PR45604(ptr %dst, ptr %src) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rsi), %xmm0
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,0,2]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u>
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u]
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [11,0,0,0,11,0,0,0,11,0,0,0,11,0,0,0,11,0,0,0,11,0,0,0,11,0,0,0,11,0,0,0]
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm3[1],ymm1[2],ymm3[3],ymm1[4],ymm3[5],ymm1[6],ymm3[7]
Expand Down Expand Up @@ -3481,9 +3481,9 @@ define void @SpinningCube() {
; SSE2-LABEL: SpinningCube:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; SSE2-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [u,u,u,1.0E+0]
; SSE2-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; SSE2-NEXT: movapd {{.*#+}} xmm2 = <u,u,-2.0E+0,u>
; SSE2-NEXT: movapd {{.*#+}} xmm2 = [u,u,-2.0E+0,u]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE2-NEXT: xorps %xmm3, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,0]
Expand All @@ -3500,9 +3500,9 @@ define void @SpinningCube() {
; SSSE3-LABEL: SpinningCube:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; SSSE3-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
; SSSE3-NEXT: movaps {{.*#+}} xmm0 = [u,u,u,1.0E+0]
; SSSE3-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; SSSE3-NEXT: movapd {{.*#+}} xmm2 = <u,u,-2.0E+0,u>
; SSSE3-NEXT: movapd {{.*#+}} xmm2 = [u,u,-2.0E+0,u]
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSSE3-NEXT: xorps %xmm3, %xmm3
; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,0]
Expand All @@ -3519,8 +3519,8 @@ define void @SpinningCube() {
; SSE41-LABEL: SpinningCube:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; SSE41-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
; SSE41-NEXT: movaps {{.*#+}} xmm1 = <0.0E+0,0.0E+0,-2.0E+0,u>
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [u,u,u,1.0E+0]
; SSE41-NEXT: movaps {{.*#+}} xmm1 = [0.0E+0,0.0E+0,-2.0E+0,u]
; SSE41-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; SSE41-NEXT: movaps %xmm1, %xmm3
; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm2[0]
Expand All @@ -3539,7 +3539,7 @@ define void @SpinningCube() {
; AVX: # %bb.0: # %entry
; AVX-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = <0.0E+0,0.0E+0,-2.0E+0,u>
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [0.0E+0,0.0E+0,-2.0E+0,u]
; AVX-NEXT: vmovss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[2,3]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,2,10,u,3,u,2,u]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
Expand Down
Loading