diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e91f68425522f..28b1a415b7711 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43438,19 +43438,17 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { /// This function will also call SimplifyDemandedBits on already created /// BLENDV to perform additional simplifications. static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) || ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); - // Don't optimize before the condition has been transformed to a legal type - // and don't ever optimize vector selects that map to AVX512 mask-registers. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned BitWidth = Cond.getScalarValueSizeInBits(); - if (BitWidth < 8 || BitWidth > 64) - return SDValue(); + EVT VT = N->getValueType(0); // We can only handle the cases where VSELECT is directly legal on the // subtarget. We custom lower VSELECT nodes with constant conditions and @@ -43462,8 +43460,6 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, // Potentially, we should combine constant-condition vselect nodes // pre-legalization into shuffles and not mark as many types as custom // lowered. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = N->getValueType(0); if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) return SDValue(); // FIXME: We don't support i16-element blends currently. We could and @@ -43481,6 +43477,22 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, if (VT.is512BitVector()) return SDValue(); + // PreAVX512, without mask-registers, attempt to sign-extend bool vectors to + // allow us to use BLENDV. + if (!Subtarget.hasAVX512() && BitWidth == 1) { + EVT CondVT = VT.changeVectorElementTypeToInteger(); + if (SDValue ExtCond = combineToExtendBoolVectorInReg( + ISD::SIGN_EXTEND, SDLoc(N), CondVT, Cond, DAG, DCI, Subtarget)) { + return DAG.getNode(X86ISD::BLENDV, SDLoc(N), VT, ExtCond, + N->getOperand(1), N->getOperand(2)); + } + } + + // Don't optimize before the condition has been transformed to a legal type + // and don't ever optimize vector selects that map to AVX512 mask-registers. + if (BitWidth < 8 || BitWidth > 64) + return SDValue(); + auto OnlyUsedAsSelectCond = [](SDValue Cond) { for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); UI != UE; ++UI) diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll index 74549f95a2625..a3ba1601c5f3f 100644 --- a/llvm/test/CodeGen/X86/vector-bo-select.ll +++ b/llvm/test/CodeGen/X86/vector-bo-select.ll @@ -507,51 +507,11 @@ define <16 x float> @fdiv_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { ; AVX2-LABEL: fadd_v8f32_cast_cond: ; AVX2: # %bb.0: -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $5, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: movl %edi, %ecx -; AVX2-NEXT: shrb $4, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: vmovd %ecx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $6, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $7, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vmovd %eax, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $2, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 -; AVX2-NEXT: shrb $3, %dil -; AVX2-NEXT: movzbl %dil, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3 -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX2-NEXT: vmovd %edi, %xmm2 +; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 @@ -644,52 +604,13 @@ define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { ; AVX2-LABEL: fsub_v8f32_cast_cond: ; AVX2: # %bb.0: -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $5, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: movl %edi, %ecx -; AVX2-NEXT: shrb $4, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: vmovd %ecx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $6, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $7, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vmovd %eax, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $2, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 -; AVX2-NEXT: shrb $3, %dil -; AVX2-NEXT: movzbl %dil, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3 -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 -; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vmovd %edi, %xmm2 +; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -780,51 +701,11 @@ define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n define <8 x float> @fmul_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { ; AVX2-LABEL: fmul_v8f32_cast_cond: ; AVX2: # %bb.0: -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $5, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: movl %edi, %ecx -; AVX2-NEXT: shrb $4, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: vmovd %ecx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $6, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $7, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vmovd %eax, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $2, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 -; AVX2-NEXT: shrb $3, %dil -; AVX2-NEXT: movzbl %dil, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3 -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX2-NEXT: vmovd %edi, %xmm2 +; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0 @@ -917,51 +798,11 @@ define <8 x double> @fmul_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n define <8 x float> @fdiv_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { ; AVX2-LABEL: fdiv_v8f32_cast_cond: ; AVX2: # %bb.0: -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $5, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: movl %edi, %ecx -; AVX2-NEXT: shrb $4, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: vmovd %ecx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $6, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $7, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vmovd %eax, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: shrb $2, %al -; AVX2-NEXT: movzbl %al, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 -; AVX2-NEXT: shrb $3, %dil -; AVX2-NEXT: movzbl %dil, %eax -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: negl %eax -; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3 -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX2-NEXT: vmovd %edi, %xmm2 +; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: vdivps %ymm1, %ymm0, %ymm0