diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c908ec6fb48fab..de1c8f03b86158 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54460,11 +54460,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, unsigned NumOps = Ops.size(); switch (Op0.getOpcode()) { case X86ISD::VBROADCAST: { - if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [](SDValue Op) { + if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) { return Op.getOperand(0).getValueType().is128BitVector(); - })) - return DAG.getNode(X86ISD::MOVDDUP, DL, VT, - ConcatSubOperand(VT, Ops, 0)); + })) { + if (VT == MVT::v4f64 || VT == MVT::v4i64) + return DAG.getNode(X86ISD::UNPCKL, DL, VT, + ConcatSubOperand(VT, Ops, 0), + ConcatSubOperand(VT, Ops, 0)); + // TODO: Add pseudo v8i32 PSHUFD handling to AVX1Only targets. + if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256())) + return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI + : X86ISD::PSHUFD, + DL, VT, ConcatSubOperand(VT, Ops, 0), + getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG)); + } break; } case X86ISD::MOVDDUP: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 5f5b799a4c6c04..d8f49797ba0f5c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1550,16 +1550,16 @@ define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) { ; ; AVX2-LABEL: shuffle_v4i64_0044_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; AVX512VL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] ; AVX512VL-NEXT: retq %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index 6f843ab0e84785..8c196a377da648 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -639,19 +639,12 @@ define <8 x float> @shuffle_v8f32_00224466_v4f32(<4 x float> %a, <4 x float> %b) } define <8 x float> @shuffle_v8f32_00004444_v4f32(<4 x float> %a, <4 x float> %b) { -; AVX1-LABEL: shuffle_v8f32_00004444_v4f32: -; AVX1: # %bb.0: -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8f32_00004444_v4f32: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0 -; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1 -; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_00004444_v4f32: +; ALL: # %bb.0: +; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] +; ALL-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> ret <8 x float> %1 } @@ -3289,19 +3282,12 @@ define <8 x i32> @shuffle_v8i32_32107654_v4i32(<4 x i32> %a, <4 x i32> %b) { } define <8 x i32> @shuffle_v8i32_00004444_v4f32(<4 x i32> %a, <4 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_00004444_v4f32: -; AVX1: # %bb.0: -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_00004444_v4f32: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0 -; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1 -; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_00004444_v4f32: +; ALL: # %bb.0: +; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] +; ALL-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> ret <8 x i32> %1 }