diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index b2c0acdf9b2287..82d1997cddfa9f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -4944,6 +4944,66 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ret <4 x i64> %3 } +define <32 x i8> @PR47262(<4 x i64> %a0) { +; AVX1-LABEL: PR47262: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR47262: +; AVX2: # %bb.0: +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] +; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512VLBW-LABEL: PR47262: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] +; AVX512VLBW-NEXT: movw $21930, %ax # imm = 0x55AA +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VLVBMI-LABEL: PR47262: +; AVX512VLVBMI: # %bb.0: +; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,16,20,1,5,17,21,2,6,18,22,3,7,19,23,8,12,24,28,9,13,25,29,10,14,26,30,11,15,27,31] +; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 +; AVX512VLVBMI-NEXT: retq +; +; XOPAVX1-LABEL: PR47262: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8,12],xmm1[8,12],xmm0[9,13],xmm1[9,13],xmm0[10,14],xmm1[10,14],xmm0[11,15],xmm1[11,15] +; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,4],xmm1[0,4],xmm0[1,5],xmm1[1,5],xmm0[2,6],xmm1[2,6],xmm0[3,7],xmm1[3,7] +; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: PR47262: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] +; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: retq + %t1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> + %t2 = bitcast <4 x i64> %t1 to <32 x i8> + %t3 = shufflevector <32 x i8> %t2, <32 x i8> undef, <32 x i32> + ret <32 x i8> %t3 +} + define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) { ; AVX1-LABEL: insert_dup_mem_v32i8_i32: ; AVX1: # %bb.0: