diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index ae08d6e9313d6..ba5a5d6e87519 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -598,6 +598,7 @@ enum NodeType { CTLZ, CTPOP, BITREVERSE, + PARITY, /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eaa70444578a4..3aaf5e01d26a4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5574,6 +5574,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) return V; + // fold (and (ctpop X), 1) -> parity X + // Only do this before op legalization as it might be turned back into ctpop. + // TODO: Support vectors? + if (!LegalOperations && isOneConstant(N1) && N0.hasOneUse()) { + SDValue Tmp = N0; + + // It's possible the ctpop has been truncated, but since we only care about + // the LSB we can look through it. + if (Tmp.getOpcode() == ISD::TRUNCATE && Tmp.getOperand(0).hasOneUse()) + Tmp = Tmp.getOperand(0); + + if (Tmp.getOpcode() == ISD::CTPOP) { + SDLoc dl(N); + SDValue Parity = + DAG.getNode(ISD::PARITY, dl, Tmp.getValueType(), Tmp.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Parity); + } + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7751ebb7705a3..71ba228b53f6f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -181,6 +181,7 @@ class SelectionDAGLegalize { SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); + SDValue ExpandPARITY(SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -2785,6 +2786,28 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { } } +/// Open code the operations for PARITY of the specified operation. +SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + // If CTPOP is legal, use it. Otherwise use shifts and xor. + SDValue Result; + if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); + } else { + Result = Op; + for (unsigned i = Log2_32_Ceil(Sz); i != 0;) { + SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result, + DAG.getConstant(1 << (--i), dl, ShVT)); + Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift); + } + } + + return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT)); +} + bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector Results; @@ -2816,6 +2839,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; + case ISD::PARITY: + Results.push_back(ExpandPARITY(Node->getOperand(0), dl)); + break; case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index bfe1b365efc4d..0000fcb1dde1b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -62,7 +62,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; - case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::PARITY: + case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break; case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: @@ -503,10 +504,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { NVT)); } -SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { - // Zero extend to the promoted type and do the count there. +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { + // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -1980,6 +1981,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: @@ -2772,6 +2774,17 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); } +void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + // parity(HiLo) -> parity(Lo^Hi) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = + DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi)); + Hi = DAG.getConstant(0, dl, NVT); +} + void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 34c563672753d..86f4fcc023dd9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -311,7 +311,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); SDValue PromoteIntRes_CTLZ(SDNode *N); - SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); @@ -431,6 +431,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcd09b6141677..f854a4f4d35f8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -412,6 +412,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + case ISD::PARITY: return "parity"; // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 958bb7939046b..7ef37db68a28b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -692,6 +692,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::BITREVERSE, VT, Expand); + setOperationAction(ISD::PARITY, VT, Expand); // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8913dff47df42..5f7721267db0e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -385,6 +385,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::f80, MVT::f16, Expand); setTruncStoreAction(MVT::f128, MVT::f16, Expand); + setOperationAction(ISD::PARITY, MVT::i8, Custom); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); } else { @@ -395,6 +396,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP , MVT::i64 , Expand); else setOperationAction(ISD::CTPOP , MVT::i64 , Custom); + + setOperationAction(ISD::PARITY, MVT::i16, Custom); + setOperationAction(ISD::PARITY, MVT::i32, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::PARITY, MVT::i64, Custom); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); @@ -28865,6 +28871,58 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); } +static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + SDValue X = Op.getOperand(0); + MVT VT = Op.getSimpleValueType(); + + // Special case. If the input fits in 8-bits we can use a single 8-bit TEST. + if (VT == MVT::i8 || + DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { + X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); + SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, + DAG.getConstant(0, DL, MVT::i8)); + // Copy the inverse of the parity flag into a register with setcc. + SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); + // Extend to the original type. + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); + } + + if (VT == MVT::i64) { + // Xor the high and low 16-bits together using a 32-bit operation. + SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, + DAG.getNode(ISD::SRL, DL, MVT::i64, X, + DAG.getConstant(32, DL, MVT::i8))); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); + X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); + } + + if (VT != MVT::i16) { + // Xor the high and low 16-bits together using a 32-bit operation. + SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X, + DAG.getConstant(16, DL, MVT::i8)); + X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16); + } else { + // If the input is 16-bits, we need to extend to use an i32 shift below. + X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X); + } + + // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. + // This should allow an h-reg to be used to save a shift. + SDValue Hi = DAG.getNode( + ISD::TRUNCATE, DL, MVT::i8, + DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8))); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); + SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); + SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); + + // Copy the inverse of the parity flag into a register with setcc. + SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); + // Extend to the original type. + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); +} + static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { unsigned NewOpc = 0; @@ -29483,6 +29541,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget); case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); + case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG); @@ -43285,89 +43344,6 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, return SDValue(); } -// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity. -// Turn it into series of XORs and a setnp. -static SDValue combineParity(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - // RHS needs to be 1. - if (!isOneConstant(N1)) - return SDValue(); - - // Popcnt may be truncated. - if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) - N0 = N0.getOperand(0); - - // LHS needs to be a single use CTPOP. - if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse()) - return SDValue(); - - EVT VT = N0.getValueType(); - - // We only support 64-bit and 32-bit. 64-bit requires special handling - // unless the 64-bit popcnt instruction is legal. - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT)) - return SDValue(); - - SDLoc DL(N); - SDValue X = N0.getOperand(0); - - // Special case. If the input fits in 8-bits we can use a single 8-bit TEST. - if (DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); - SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, - DAG.getConstant(0, DL, MVT::i8)); - // Copy the inverse of the parity flag into a register with setcc. - SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); - // Extend or truncate to the original type. - return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0)); - } - - // If this is 64-bit, its always best to xor the two 32-bit pieces together - // even if we have popcnt. - if (VT == MVT::i64) { - SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, - DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(32, DL, MVT::i8))); - SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); - // Generate a 32-bit parity idiom. This will bring us back here if we need - // to expand it too. - SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32, - DAG.getNode(ISD::CTPOP, DL, MVT::i32, X), - DAG.getConstant(1, DL, MVT::i32)); - return DAG.getZExtOrTrunc(Parity, DL, N->getValueType(0)); - } - assert(VT == MVT::i32 && "Unexpected VT!"); - - // Xor the high and low 16-bits together using a 32-bit operation. - SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(16, DL, MVT::i8)); - X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16); - - // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. - // This should allow an h-reg to be used to save a shift. - // FIXME: We only get an h-reg in 32-bit mode. - SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, - DAG.getNode(ISD::SRL, DL, VT, X, - DAG.getConstant(8, DL, MVT::i8))); - SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); - SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); - SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); - - // Copy the inverse of the parity flag into a register with setcc. - SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); - // Extend or truncate to the original type. - return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0)); -} - - // Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C) // Where C is a mask containing the same number of bits as the setcc and // where the setcc will freely 0 upper bits of k-register. We can replace the @@ -43459,10 +43435,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } } - // This must be done before legalization has expanded the ctpop. - if (SDValue V = combineParity(N, DAG, Subtarget)) - return V; - // Match all-of bool scalar reductions into a bitcast/movmsk + cmp. // TODO: Support multiple SrcOps. if (VT == MVT::i1) { diff --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll new file mode 100644 index 0000000000000..bdddb6f1069ce --- /dev/null +++ b/llvm/test/CodeGen/AArch64/parity.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s + +define i4 @parity_4(i4 %x) { +; CHECK-LABEL: parity_4: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xf +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i4 @llvm.ctpop.i4(i4 %x) + %2 = and i4 %1, 1 + ret i4 %2 +} + +define i8 @parity_8(i8 %x) { +; CHECK-LABEL: parity_8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i8 @llvm.ctpop.i8(i8 %x) + %2 = and i8 %1, 1 + ret i8 %2 +} + +define i16 @parity_16(i16 %x) { +; CHECK-LABEL: parity_16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i16 @llvm.ctpop.i16(i16 %x) + %2 = and i16 %1, 1 + ret i16 %2 +} + +define i17 @parity_17(i17 %x) { +; CHECK-LABEL: parity_17: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x1ffff +; CHECK-NEXT: eor w8, w8, w8, lsr #16 +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i17 @llvm.ctpop.i17(i17 %x) + %2 = and i17 %1, 1 + ret i17 %2 +} + +define i32 @parity_32(i32 %x) { +; CHECK-LABEL: parity_32: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w0, lsr #16 +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = and i32 %1, 1 + ret i32 %2 +} + +define i64 @parity_64(i64 %x) { +; CHECK-LABEL: parity_64: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x0, lsr #32 +; CHECK-NEXT: eor x8, x8, x8, lsr #16 +; CHECK-NEXT: eor x8, x8, x8, lsr #8 +; CHECK-NEXT: eor x8, x8, x8, lsr #4 +; CHECK-NEXT: eor x8, x8, x8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and x0, x8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = and i64 %1, 1 + ret i64 %2 +} + +define i32 @parity_64_trunc(i64 %x) { +; CHECK-LABEL: parity_64_trunc: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x0, lsr #32 +; CHECK-NEXT: eor x8, x8, x8, lsr #16 +; CHECK-NEXT: eor x8, x8, x8, lsr #8 +; CHECK-NEXT: eor x8, x8, x8, lsr #4 +; CHECK-NEXT: eor x8, x8, x8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = trunc i64 %1 to i32 + %3 = and i32 %2, 1 + ret i32 %3 +} + +define i8 @parity_32_trunc(i32 %x) { +; CHECK-LABEL: parity_32_trunc: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w0, lsr #16 +; CHECK-NEXT: eor w8, w8, w8, lsr #8 +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = trunc i32 %1 to i8 + %3 = and i8 %2, 1 + ret i8 %3 +} + +define i32 @parity_8_zext(i8 %x) { +; CHECK-LABEL: parity_8_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %a = zext i8 %x to i32 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +define i32 @parity_8_mask(i32 %x) { +; CHECK-LABEL: parity_8_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w8, w8, lsr #4 +; CHECK-NEXT: eor w8, w8, w8, lsr #2 +; CHECK-NEXT: eor w8, w8, w8, lsr #1 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %a = and i32 %x, 255 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +declare i4 @llvm.ctpop.i4(i4 %x) +declare i8 @llvm.ctpop.i8(i8 %x) +declare i16 @llvm.ctpop.i16(i16 %x) +declare i17 @llvm.ctpop.i17(i17 %x) +declare i32 @llvm.ctpop.i32(i32 %x) +declare i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/ARM/parity.ll b/llvm/test/CodeGen/ARM/parity.ll new file mode 100644 index 0000000000000..40c0d7bd32f11 --- /dev/null +++ b/llvm/test/CodeGen/ARM/parity.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s + +define i4 @parity_4(i4 %x) { +; CHECK-LABEL: parity_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i4 @llvm.ctpop.i4(i4 %x) + %2 = and i4 %1, 1 + ret i4 %2 +} + +define i8 @parity_8(i8 %x) { +; CHECK-LABEL: parity_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i8 @llvm.ctpop.i8(i8 %x) + %2 = and i8 %1, 1 + ret i8 %2 +} + +define i16 @parity_16(i16 %x) { +; CHECK-LABEL: parity_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxth r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i16 @llvm.ctpop.i16(i16 %x) + %2 = and i16 %1, 1 + ret i16 %2 +} + +define i17 @parity_17(i17 %x) { +; CHECK-LABEL: parity_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: bfc r0, #17, #15 +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i17 @llvm.ctpop.i17(i17 %x) + %2 = and i17 %1, 1 + ret i17 %2 +} + +define i32 @parity_32(i32 %x) { +; CHECK-LABEL: parity_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = and i32 %1, 1 + ret i32 %2 +} + +define i64 @parity_64(i64 %x) { +; CHECK-LABEL: parity_64: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = and i64 %1, 1 + ret i64 %2 +} + +define i32 @parity_64_trunc(i64 %x) { +; CHECK-LABEL: parity_64_trunc: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = trunc i64 %1 to i32 + %3 = and i32 %2, 1 + ret i32 %3 +} + +define i8 @parity_32_trunc(i32 %x) { +; CHECK-LABEL: parity_32_trunc: +; CHECK: @ %bb.0: +; CHECK-NEXT: eor r0, r0, r0, lsr #16 +; CHECK-NEXT: eor r0, r0, r0, lsr #8 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = trunc i32 %1 to i8 + %3 = and i8 %2, 1 + ret i8 %3 +} + +define i32 @parity_8_zext(i8 %x) { +; CHECK-LABEL: parity_8_zext: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %a = zext i8 %x to i32 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +define i32 @parity_8_mask(i32 %x) { +; CHECK-LABEL: parity_8_mask: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0 +; CHECK-NEXT: eor r0, r0, r0, lsr #4 +; CHECK-NEXT: eor r0, r0, r0, lsr #2 +; CHECK-NEXT: eor r0, r0, r0, lsr #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: bx lr + %a = and i32 %x, 255 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +declare i4 @llvm.ctpop.i4(i4 %x) +declare i8 @llvm.ctpop.i8(i8 %x) +declare i16 @llvm.ctpop.i16(i16 %x) +declare i17 @llvm.ctpop.i17(i17 %x) +declare i32 @llvm.ctpop.i32(i32 %x) +declare i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 6289ab482426c..d7344a4a2ed78 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -4,6 +4,187 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT +define i4 @parity_4(i4 %x) { +; X86-NOPOPCNT-LABEL: parity_4: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: testb $15, {{[0-9]+}}(%esp) +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_4: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: testb $15, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_4: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: testb $15, {{[0-9]+}}(%esp) +; X86-POPCNT-NEXT: setnp %al +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_4: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: testb $15, %dil +; X64-POPCNT-NEXT: setnp %al +; X64-POPCNT-NEXT: retq + %1 = tail call i4 @llvm.ctpop.i4(i4 %x) + %2 = and i4 %1, 1 + ret i4 %2 +} + +define i8 @parity_8(i8 %x) { +; X86-NOPOPCNT-LABEL: parity_8: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_8: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: testb %dil, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_8: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; X86-POPCNT-NEXT: setnp %al +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_8: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: testb %dil, %dil +; X64-POPCNT-NEXT: setnp %al +; X64-POPCNT-NEXT: retq + %1 = tail call i8 @llvm.ctpop.i8(i8 %x) + %2 = and i8 %1, 1 + ret i8 %2 +} + +define i16 @parity_16(i16 %x) { +; X86-NOPOPCNT-LABEL: parity_16: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_16: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movl %edi, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %ch, %cl +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_16: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_16: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-POPCNT-NEXT: retq + %1 = tail call i16 @llvm.ctpop.i16(i16 %x) + %2 = and i16 %1, 1 + ret i16 %2 +} + +define i16 @parity_16_load(i16* %x) { +; X86-NOPOPCNT-LABEL: parity_16_load: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movzwl (%eax), %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_16_load: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movzwl (%rdi), %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %ch, %cl +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_16_load: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntw (%eax), %ax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_16_load: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntw (%rdi), %ax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-POPCNT-NEXT: retq + %1 = load i16, i16* %x + %2 = tail call i16 @llvm.ctpop.i16(i16 %1) + %3 = and i16 %2, 1 + ret i16 %3 +} + +define i17 @parity_17(i17 %x) { +; X86-NOPOPCNT-LABEL: parity_17: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %eax +; X86-NOPOPCNT-NEXT: andl $131071, %eax # imm = 0x1FFFF +; X86-NOPOPCNT-NEXT: movl %eax, %edx +; X86-NOPOPCNT-NEXT: shrl $16, %edx +; X86-NOPOPCNT-NEXT: xorl %eax, %edx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %dl, %ch +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_17: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $131071, %eax # imm = 0x1FFFF +; X64-NOPOPCNT-NEXT: movl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $16, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $8, %edi +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %cl, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_17: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movl $131071, %eax # imm = 0x1FFFF +; X86-POPCNT-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_17: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: andl $131071, %edi # imm = 0x1FFFF +; X64-POPCNT-NEXT: popcntl %edi, %eax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: retq + %1 = tail call i17 @llvm.ctpop.i17(i17 %x) + %2 = and i17 %1, 1 + ret i17 %2 +} + define i32 @parity_32(i32 %x) { ; X86-NOPOPCNT-LABEL: parity_32: ; X86-NOPOPCNT: # %bb.0: @@ -157,14 +338,14 @@ define i8 @parity_32_trunc(i32 %x) { ; X86-POPCNT-LABEL: parity_32_trunc: ; X86-POPCNT: # %bb.0: ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: andb $1, %al +; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_32_trunc: ; X64-POPCNT: # %bb.0: ; X64-POPCNT-NEXT: popcntl %edi, %eax -; X64-POPCNT-NEXT: andb $1, %al +; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-POPCNT-NEXT: retq %1 = tail call i32 @llvm.ctpop.i32(i32 %x) @@ -241,5 +422,9 @@ define i32 @parity_8_mask(i32 %x) { ret i32 %c } +declare i4 @llvm.ctpop.i4(i4 %x) +declare i8 @llvm.ctpop.i8(i8 %x) +declare i16 @llvm.ctpop.i16(i16 %x) +declare i17 @llvm.ctpop.i17(i17 %x) declare i32 @llvm.ctpop.i32(i32 %x) declare i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll index fb019ffd99e9b..06a428c514a78 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -53,7 +53,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $3, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <2 x i64> %0 to <2 x i1> @@ -103,7 +103,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <4 x i32> %0 to <4 x i1> @@ -251,7 +251,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -974,7 +974,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $3, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <2 x i64> %0, zeroinitializer @@ -1025,7 +1025,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <4 x i32> %0, zeroinitializer @@ -1214,7 +1214,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: testb %al, %al +; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq