diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index fbbaf720336caf..4922af83953c36 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -842,3 +842,110 @@ define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_ %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> ret <64 x i8> %5 } + +define <64 x i8> @PR54562_ref(<64 x i8> %a0) { +; AVX512F-LABEL: PR54562_ref: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2] +; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30] +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10] +; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: PR54562_ref: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: PR54562_ref: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2] +; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30] +; AVX512DQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10] +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: PR54562_ref: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,13,12,14,13,16,15,17,16,19,18,20,19,22,21,23,22,25,24,26,25,28,27,29,28,31,30,32,31,34,33,35,34,37,36,38,37,40,39,41,40,43,42,44,43,46,45,47,46] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: retq + %shuffle1 = shufflevector <64 x i8> %a0, <64 x i8> poison, <64 x i32> + %shuffle2 = shufflevector <64 x i8> %shuffle1, <64 x i8> poison, <64 x i32> + ret <64 x i8> %shuffle2 +} + +define void @PR54562_mem(<64 x i8>* %src, <64 x i8>* %dst) { +; AVX512F-LABEL: PR54562_mem: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm0 +; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10] +; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = mem[0,1,1,2] +; AVX512F-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30] +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] +; AVX512F-NEXT: vmovdqa %xmm0, 48(%rsi) +; AVX512F-NEXT: vmovdqa %xmm1, 32(%rsi) +; AVX512F-NEXT: vmovdqa %ymm2, (%rsi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: PR54562_mem: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = mem[0,1,1,2] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30] +; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm1 +; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rsi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: PR54562_mem: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm0 +; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10] +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm2 = mem[0,1,1,2] +; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30] +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] +; AVX512DQ-NEXT: vmovdqa %xmm0, 48(%rsi) +; AVX512DQ-NEXT: vmovdqa %xmm1, 32(%rsi) +; AVX512DQ-NEXT: vmovdqa %ymm2, (%rsi) +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: PR54562_mem: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm0 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,13,12,14,13,16,15,17,16,19,18,20,19,22,21,23,22,25,24,26,25,28,27,29,28,31,30,32,31,34,33,35,34,37,36,38,37,40,39,41,40,43,42,44,43,46,45,47,46] +; AVX512VBMI-NEXT: vpermb (%rdi), %zmm0, %zmm0 +; AVX512VBMI-NEXT: vmovdqa64 %zmm0, (%rsi) +; AVX512VBMI-NEXT: vzeroupper +; AVX512VBMI-NEXT: retq + %load = load <64 x i8>, <64 x i8>* %src, align 512 + %shuffle1 = shufflevector <64 x i8> %load, <64 x i8> poison, <64 x i32> + %shuffle2 = shufflevector <64 x i8> %shuffle1, <64 x i8> poison, <64 x i32> + store <64 x i8> %shuffle2, <64 x i8>* %dst, align 512 + ret void +}