diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index aff72452af6c75..961c39832b6278 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44500,14 +44500,16 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)). // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)). // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)). - if (VecVT.is256BitVector()) { + if (VecVT.is256BitVector() && NumElts <= CmpBits) { SmallVector Ops; if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) && Ops.size() == 2) { SDLoc DL(EFLAGS); - EVT SubVT = Ops[0].getValueType(); + EVT SubVT = Ops[0].getValueType().changeTypeToInteger(); APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2); - SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops); + SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, + DAG.getBitcast(SubVT, Ops[0]), + DAG.getBitcast(SubVT, Ops[1])); V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V); return DAG.getNode(X86ISD::CMP, DL, MVT::i32, DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V), diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll index 17d01e1d3362ca..9e393a76a5b380 100644 --- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll @@ -134,3 +134,36 @@ define i32 @movmskps_sext_v8i32(<8 x i16> %a0) { %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2) ret i32 %3 } + +define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1) { +; CHECK-LABEL: movmskps_concat_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmovmskps %xmm0, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: retq + %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> + %2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1) + %3 = icmp ne i32 %2, 0 + %4 = sext i1 %3 to i32 + ret i32 %4 +} + +define i32 @movmskps_demanded_concat_v4f32(<4 x float> %a0, <4 x float> %a1) { +; CHECK-LABEL: movmskps_demanded_concat_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; CHECK-NEXT: vmovmskps %ymm0, %eax +; CHECK-NEXT: andl $3, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> + %2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1) + %3 = and i32 %2, 3 + %4 = icmp ne i32 %3, 0 + %5 = sext i1 %4 to i32 + ret i32 %5 +}