diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0c408273dac7c2..b6f7b92426973b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51134,6 +51134,9 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); MVT VT = N->getSimpleValueType(0); + MVT SVT = VT.getScalarType(); + int NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); // ANDNP(undef, x) -> 0 // ANDNP(x, undef) -> 0 @@ -51152,6 +51155,19 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, if (SDValue Not = IsNOT(N0, DAG)) return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1); + // Constant Folding + APInt Undefs0, Undefs1; + SmallVector EltBits0, EltBits1; + if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0) && + getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) { + SDLoc DL(N); + SmallVector ResultBits; + for (int I = 0; I != NumElts; ++I) + ResultBits.push_back(~EltBits0[I] & EltBits1[I]); + APInt ResultUndefs = APInt::getZero(NumElts); + return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL); + } + // TODO: Constant fold NOT(N0) to allow us to use AND. // TODO: Do this in IsNOT with suitable oneuse checks? @@ -51166,8 +51182,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) { APInt UndefElts; SmallVector EltBits; - int NumElts = VT.getVectorNumElements(); - int EltSizeInBits = VT.getScalarSizeInBits(); APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); APInt DemandedElts = APInt::getAllOnes(NumElts); if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, diff --git a/llvm/test/CodeGen/X86/fshl-splat-undef.ll b/llvm/test/CodeGen/X86/fshl-splat-undef.ll index dcbd3a56a2ba7a..d6090d20cf9613 100644 --- a/llvm/test/CodeGen/X86/fshl-splat-undef.ll +++ b/llvm/test/CodeGen/X86/fshl-splat-undef.ll @@ -22,12 +22,11 @@ define void @test_fshl(<8 x i64> %lo, <8 x i64> %hi, <8 x i64>* %arr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] -; CHECK-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm2, %zmm2 -; CHECK-NEXT: vpsrlq $1, %zmm0, %zmm0 -; CHECK-NEXT: vpsrlvq %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: vpsllq $12, %zmm1, %zmm1 -; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1 -; CHECK-NEXT: vmovdqa64 %zmm1, (%eax) +; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm2, %zmm2 +; CHECK-NEXT: vpsllvq %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpsrlq $52, %zmm0, %zmm0 +; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0 +; CHECK-NEXT: vmovdqa64 %zmm0, (%eax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl entry: