diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 58401a2e34283..1e7745a4b8836 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -5109,6 +5109,63 @@ define <32 x i8> @PR55066(<32 x i8> %a0) { ret <32 x i8> %shuffle } +define <4 x i64> @PR66150(ptr %b) { +; AVX1-LABEL: PR66150: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR66150: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] +; AVX2-NEXT: retq +; +; AVX512VLBW-LABEL: PR66150: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] +; AVX512VLBW-NEXT: retq +; +; AVX512VLVBMI-LABEL: PR66150: +; AVX512VLVBMI: # %bb.0: +; AVX512VLVBMI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3] +; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 +; AVX512VLVBMI-NEXT: retq +; +; XOPAVX1-LABEL: PR66150: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] +; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] +; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: PR66150: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] +; XOPAVX2-NEXT: retq + %tmp1 = load i32, ptr %b, align 4 + %tmp2 = insertelement <8 x i32> undef, i32 %tmp1, i64 0 + %tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> poison, <8 x i32> + %tmp4 = bitcast <8 x i32> %tmp3 to <32 x i8> + %tmp5 = shufflevector <32 x i8> %tmp4, <32 x i8> poison, <32 x i32> + %tmp6 = bitcast <32 x i8> %tmp5 to <4 x i64> + ret <4 x i64> %tmp6 +} + define <32 x i8> @insert_dup_mem_v32i8_i32(ptr %ptr) { ; AVX1-LABEL: insert_dup_mem_v32i8_i32: ; AVX1: # %bb.0: