diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 414725ef5307b..ac94dee5e2dc1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47433,9 +47433,7 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y) // TODO: Add COND_NE handling? - // TODO: Add TESTP handling - if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX() && - EFLAGS.getOpcode() == X86ISD::PTEST) { + if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) { SDValue Src0 = peekThroughBitcasts(Op0); SDValue Src1 = peekThroughBitcasts(Op1); if (Src0.getOpcode() == ISD::OR && Src1.getOpcode() == ISD::OR) { @@ -47443,10 +47441,12 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, peekThroughBitcasts(Src0.getOperand(1)), true); Src1 = getSplitVectorSrc(peekThroughBitcasts(Src1.getOperand(0)), peekThroughBitcasts(Src1.getOperand(1)), true); - if (Src0 && Src1) + if (Src0 && Src1) { + EVT OpVT2 = OpVT.getDoubleNumVectorElementsVT(*DAG.getContext()); return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, - DAG.getBitcast(MVT::v4i64, Src0), - DAG.getBitcast(MVT::v4i64, Src1)); + DAG.getBitcast(OpVT2, Src0), + DAG.getBitcast(OpVT2, Src1)); + } } } } diff --git a/llvm/test/CodeGen/X86/combine-testpd.ll b/llvm/test/CodeGen/X86/combine-testpd.ll index 8652c591430b1..9d684907adaac 100644 --- a/llvm/test/CodeGen/X86/combine-testpd.ll +++ b/llvm/test/CodeGen/X86/combine-testpd.ll @@ -206,14 +206,11 @@ define i32 @testpdnzc_256_signbit_multiuse(<4 x double> %c, i32 %a, i32 %b) { ret i32 %t6 } -; FIXME: Foldable to vtestpd(vcmpeqpd(ymm0,ymm1),vcmpeqpd(ymm0,ymm1)) define i1 @PR62171(<4 x double> %a0, <4 x double> %a1) { ; CHECK-LABEL: PR62171: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vorpd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vtestpd %xmm0, %xmm0 +; CHECK-NEXT: vtestpd %ymm0, %ymm0 ; CHECK-NEXT: sete %al ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/combine-testps.ll b/llvm/test/CodeGen/X86/combine-testps.ll index 43f99470cccd5..9b07d55675092 100644 --- a/llvm/test/CodeGen/X86/combine-testps.ll +++ b/llvm/test/CodeGen/X86/combine-testps.ll @@ -205,14 +205,11 @@ define i32 @testpsc_256_signbit_multiuse(<8 x float> %c, i32 %a, i32 %b) { ret i32 %t5 } -; FIXME: Foldable to vtestps(vcmpeqps(ymm0,ymm1),vcmpeqps(ymm0,ymm1)) define i1 @PR62171(<8 x float> %a0, <8 x float> %a1) { ; CHECK-LABEL: PR62171: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vtestps %xmm0, %xmm0 +; CHECK-NEXT: vtestps %ymm0, %ymm0 ; CHECK-NEXT: sete %al ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq