diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7abc854454348..38be3a82af658 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16444,15 +16444,14 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, DAG, Subtarget); } - // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG)) - return V; - if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; - // Check if the blend happens to exactly fit that of SHUFPD. + // Use dedicated unpack instructions for masks that match their pattern. + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, V1, V2, Mask, DAG)) + return V; + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Op; diff --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll index 45df725d7a78c..0cdc5458e71ca 100644 --- a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll +++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll @@ -7,7 +7,7 @@ define void @foo(<2 x float> %0) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 ; CHECK-NEXT: vmovlps %xmm0, 0 ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll index 76183ac5f8fa3..75333bf835f89 100644 --- a/llvm/test/CodeGen/X86/subvector-broadcast.ll +++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll @@ -1662,19 +1662,46 @@ define <4 x double> @broadcast_v4f64_v2f64_4u61(ptr %vp, <4 x double> %default) ret <4 x double> %res } +; TODO: prefer vblend vs vunpckh on AVX1 targets define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(ptr %vp, <8 x float> %default) { -; X86-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vbroadcastsd (%eax), %ymm1 -; X86-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] -; X86-NEXT: retl +; X86-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX1-NEXT: vbroadcastsd (%eax), %ymm1 +; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] +; X86-AVX1-NEXT: retl ; -; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: -; X64: # %bb.0: -; X64-NEXT: vbroadcastsd (%rdi), %ymm1 -; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] -; X64-NEXT: retq +; X86-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: vbroadcastsd (%eax), %ymm1 +; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: vbroadcastsd (%eax), %ymm1 +; X86-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; X86-AVX512-NEXT: retl +; +; X64-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vbroadcastsd (%rdi), %ymm1 +; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 +; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm1 +; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; X64-AVX512-NEXT: retq %vec = load <2 x float>, ptr %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %default