diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f5b8b58c18e1e..49beadae63f03 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48860,6 +48860,26 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); } + // Attempt to convert PTESTZ(X,SIGNMASK) -> VTESTPD/PSZ(X,X) on AVX targets. + if (EFLAGS.getOpcode() == X86ISD::PTEST && Subtarget.hasAVX()) { + KnownBits KnownOp1 = DAG.computeKnownBits(Op1); + assert(KnownOp1.getBitWidth() == 64 && + "Illegal PTEST vector element width"); + if (KnownOp1.isConstant()) { + const APInt &Mask = KnownOp1.getConstant(); + if (Mask.isSignMask()) { + MVT FpVT = MVT::getVectorVT(MVT::f64, OpVT.getSizeInBits() / 64); + Op0 = DAG.getBitcast(FpVT, DAG.getFreeze(Op0)); + return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Op0, Op0); + } + if (Mask.isSplat(32) && Mask.trunc(32).isSignMask()) { + MVT FpVT = MVT::getVectorVT(MVT::f32, OpVT.getSizeInBits() / 32); + Op0 = DAG.getBitcast(FpVT, DAG.getFreeze(Op0)); + return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Op0, Op0); + } + } + } + // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y) // TODO: Add COND_NE handling? if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) { diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index 9816fa7c83560..044327d94c0ef 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -875,28 +875,12 @@ define i1 @mask_v8i32(<8 x i32> %a0) { ; SSE41-NEXT: sete %al ; SSE41-NEXT: retq ; -; AVX1-LABEL: mask_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 -; AVX1-NEXT: sete %al -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: mask_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456] -; AVX2-NEXT: vptest %ymm1, %ymm0 -; AVX2-NEXT: sete %al -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: mask_v8i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456] -; AVX512-NEXT: vptest %ymm1, %ymm0 -; AVX512-NEXT: sete %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: mask_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: vtestps %ymm0, %ymm0 +; AVX-NEXT: sete %al +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0) %2 = and i32 %1, 2147483648 %3 = icmp eq i32 %2, 0 @@ -965,28 +949,12 @@ define i1 @signtest_v8i32(<8 x i32> %a0) { ; SSE41-NEXT: sete %al ; SSE41-NEXT: retq ; -; AVX1-LABEL: signtest_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 -; AVX1-NEXT: sete %al -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: signtest_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456] -; AVX2-NEXT: vptest %ymm1, %ymm0 -; AVX2-NEXT: sete %al -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: signtest_v8i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456] -; AVX512-NEXT: vptest %ymm1, %ymm0 -; AVX512-NEXT: sete %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: signtest_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: vtestps %ymm0, %ymm0 +; AVX-NEXT: sete %al +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0) %2 = icmp sgt i32 %1, -1 ret i1 %2 @@ -1010,28 +978,12 @@ define i1 @signtest_v4i64(<4 x i64> %a0) { ; SSE41-NEXT: sete %al ; SSE41-NEXT: retq ; -; AVX1-LABEL: signtest_v4i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 -; AVX1-NEXT: sete %al -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: signtest_v4i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vptest %ymm1, %ymm0 -; AVX2-NEXT: sete %al -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: signtest_v4i64: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX512-NEXT: vptest %ymm1, %ymm0 -; AVX512-NEXT: sete %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: signtest_v4i64: +; AVX: # %bb.0: +; AVX-NEXT: vtestpd %ymm0, %ymm0 +; AVX-NEXT: sete %al +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0) %2 = icmp sgt i64 %1, -1 ret i1 %2