diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f608d8b88362e..7df01883ef90a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45564,10 +45564,11 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// TODO: Could we move this to DAGCombine? static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, SelectionDAG &DAG) { - // Take advantage of vector comparisons producing 0 or -1 in each lane to - // optimize away operation when it's from a constant. + // Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane + // to optimize away operation when it's from a constant. // // The general transformation is: // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> @@ -45579,10 +45580,10 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, // aren't the same. EVT VT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); + unsigned NumEltBits = VT.getScalarSizeInBits(); SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); - if (!VT.isVector() || Op0->getOpcode() != ISD::AND || - Op0->getOperand(0)->getOpcode() != ISD::SETCC || - VT.getSizeInBits() != Op0.getValueSizeInBits()) + if (!VT.isVector() || Op0.getOpcode() != ISD::AND || + DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits) return SDValue(); // Now check that the other operand of the AND is a constant. We could diff --git a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll index 0274a8b45826c..365ae9a61c27d 100644 --- a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll +++ b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll @@ -97,17 +97,16 @@ define void @foo4(<4 x float>* noalias %result) nounwind { ret void } -; TODO: Test when we're masking against a sign extended setcc. +; Test when we're masking against a sign extended setcc. define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: LCPI5_0: -; CHECK-NEXT: .long 1 ## 0x1 +; CHECK-NEXT: .long 1065353216 ## 0x3f800000 ; CHECK-NEXT: .long 0 ## 0x0 -; CHECK-NEXT: .long 1 ## 0x1 +; CHECK-NEXT: .long 1065353216 ## 0x3f800000 ; CHECK-NEXT: .long 0 ## 0x0 ; CHECK: ## %bb.0: ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = icmp sgt <4 x i32> %a0, %a1 %2 = sext <4 x i1> %1 to <4 x i32> @@ -116,12 +115,12 @@ define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x float> %4 } -; TODO: Test when we're masking against mask arithmetic, not the setcc's directly. +; Test when we're masking against mask arithmetic, not the setcc's directly. define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: LCPI6_0: -; CHECK-NEXT: .long 1 ## 0x1 +; CHECK-NEXT: .long 1065353216 ## 0x3f800000 ; CHECK-NEXT: .long 0 ## 0x0 -; CHECK-NEXT: .long 1 ## 0x1 +; CHECK-NEXT: .long 1065353216 ## 0x3f800000 ; CHECK-NEXT: .long 0 ## 0x0 ; CHECK: ## %bb.0: ; CHECK-NEXT: movdqa %xmm0, %xmm2 @@ -130,7 +129,6 @@ define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 ; CHECK-NEXT: pand %xmm2, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = icmp sgt <4 x i32> %a0, %a1 %2 = icmp sgt <4 x i32> %a0, zeroinitializer @@ -139,4 +137,4 @@ define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) { %5 = and <4 x i32> %4, %6 = uitofp <4 x i32> %5 to <4 x float> ret <4 x float> %6 -} \ No newline at end of file +}