276 changes: 139 additions & 137 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2341,8 +2341,7 @@ define void @store_i16_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512F-SLOW-NEXT: vpshufb {{.*#+}} ymm10 = ymm2[u,u,u,u,u,u,u,u,u,u,14,15],zero,zero,ymm2[u,u,u,u,u,u,u,u,u,u,16,17],zero,zero,ymm2[u,u,u,u]
; AVX512F-SLOW-NEXT: vporq %ymm5, %ymm10, %ymm19
; AVX512F-SLOW-NEXT: vpbroadcastd 8(%rax), %ymm5
; AVX512F-SLOW-NEXT: vmovdqa {{.*#+}} ymm10 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
; AVX512F-SLOW-NEXT: vpandn %ymm5, %ymm10, %ymm5
; AVX512F-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5
; AVX512F-SLOW-NEXT: vpshufb {{.*#+}} ymm10 = ymm13[12,13,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm13[14,15,u,u,u,u,u,u,u,u],zero,zero,zero,zero,ymm13[16,17,u,u]
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm10, %zmm5, %zmm5
; AVX512F-SLOW-NEXT: vpshufd {{.*#+}} ymm10 = ymm6[0,1,1,3,4,5,5,7]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -567,8 +567,8 @@ define void @store_i8_stride3_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm3
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
; AVX512BW-NEXT: vpshufb %zmm2, %zmm1, %zmm1
Expand Down Expand Up @@ -1086,8 +1086,8 @@ define void @store_i8_stride3_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm2
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm6, %zmm5, %zmm4
; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm4
Expand Down
492 changes: 245 additions & 247 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll

Large diffs are not rendered by default.

100 changes: 48 additions & 52 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll

Large diffs are not rendered by default.

428 changes: 214 additions & 214 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll

Large diffs are not rendered by default.

52 changes: 22 additions & 30 deletions llvm/test/CodeGen/X86/vector-shuffle-v192.ll
Original file line number Diff line number Diff line change
Expand Up @@ -214,23 +214,21 @@ define <64 x i8> @f2(ptr %p0) {
; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm4
; AVX512F-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpternlogq $234, %zmm2, %zmm0, %zmm4
; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
; AVX512F-NEXT: vmovdqa 96(%rdi), %xmm0
; AVX512F-NEXT: vpshufb %xmm5, %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm2
; AVX512F-NEXT: vpshufb %xmm6, %xmm2, %xmm2
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm4
; AVX512F-NEXT: vpshufb %xmm6, %xmm4, %xmm4
; AVX512F-NEXT: vpor %xmm0, %xmm4, %xmm0
; AVX512F-NEXT: vinserti32x4 $2, %xmm0, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa 80(%rdi), %xmm2
; AVX512F-NEXT: vpshufb %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vmovdqa 64(%rdi), %xmm2
; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vmovdqa 80(%rdi), %xmm4
; AVX512F-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vmovdqa 64(%rdi), %xmm4
; AVX512F-NEXT: vpshufb %xmm3, %xmm4, %xmm3
; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm0
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: f2:
Expand All @@ -253,11 +251,9 @@ define <64 x i8> @f2(ptr %p0) {
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, 128(%rdi), %zmm2, %zmm2
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = <u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,1,5,7,11,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,1,5,7,11,13,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: movabsq $8998403163813888, %rax # imm = 0x1FF800001FF800
; AVX512BW-NEXT: kmovq %rax, %k1
; AVX512BW-NEXT: vpshufb %zmm4, %zmm2, %zmm0 {%k1}
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,35,37,41,43,47,49,53,55,59,61,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vmovdqa 96(%rdi), %xmm2
; AVX512BW-NEXT: vpshufb %xmm5, %xmm2, %xmm2
; AVX512BW-NEXT: vmovdqa 112(%rdi), %xmm4
Expand Down Expand Up @@ -501,23 +497,21 @@ define <64 x i8> @f4(ptr %p0) {
; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm4
; AVX512F-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpternlogq $234, %zmm2, %zmm0, %zmm4
; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
; AVX512F-NEXT: vmovdqa 96(%rdi), %xmm0
; AVX512F-NEXT: vpshufb %xmm5, %xmm0, %xmm0
; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm2
; AVX512F-NEXT: vpshufb %xmm6, %xmm2, %xmm2
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm4
; AVX512F-NEXT: vpshufb %xmm6, %xmm4, %xmm4
; AVX512F-NEXT: vpor %xmm0, %xmm4, %xmm0
; AVX512F-NEXT: vinserti32x4 $2, %xmm0, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa 80(%rdi), %xmm2
; AVX512F-NEXT: vpshufb %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vmovdqa 64(%rdi), %xmm2
; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vmovdqa 80(%rdi), %xmm4
; AVX512F-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vmovdqa 64(%rdi), %xmm4
; AVX512F-NEXT: vpshufb %xmm3, %xmm4, %xmm3
; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm0
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: f4:
Expand All @@ -540,11 +534,9 @@ define <64 x i8> @f4(ptr %p0) {
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, 128(%rdi), %zmm2, %zmm2
; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = <u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,0,4,6,10,12,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,0,4,6,10,12,u,u,u,u,u,u,u,u,u,u,u>
; AVX512BW-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: movabsq $8998403163813888, %rax # imm = 0x1FF800001FF800
; AVX512BW-NEXT: kmovq %rax, %k1
; AVX512BW-NEXT: vpshufb %zmm4, %zmm2, %zmm0 {%k1}
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,34,36,40,42,46,48,52,54,58,60,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vmovdqa 96(%rdi), %xmm2
; AVX512BW-NEXT: vpshufb %xmm5, %xmm2, %xmm2
; AVX512BW-NEXT: vmovdqa 112(%rdi), %xmm4
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/x86-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,8 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm3
; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
; AVX512-NEXT: vpshufb %zmm2, %zmm1, %zmm1
Expand Down Expand Up @@ -1252,8 +1252,8 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x
; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm2
; AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
; AVX512-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpshufb %zmm3, %zmm2, %zmm2
; AVX512-NEXT: vinserti64x4 $1, %ymm6, %zmm5, %zmm4
; AVX512-NEXT: vpshufb %zmm3, %zmm4, %zmm4
Expand Down