diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ae08d6e9313d6..ba5a5d6e87519 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -598,6 +598,7 @@ enum NodeType {
   CTLZ,
   CTPOP,
   BITREVERSE,
+  PARITY,
 
   /// Bit counting operators with an undefined result for zero inputs.
   CTTZ_ZERO_UNDEF,
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eaa70444578a4..3aaf5e01d26a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5574,6 +5574,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
       return V;
 
+  // fold (and (ctpop X), 1) -> parity X
+  // Only do this before op legalization as it might be turned back into ctpop.
+  // TODO: Support vectors?
+  if (!LegalOperations && isOneConstant(N1) && N0.hasOneUse()) {
+    SDValue Tmp = N0;
+
+    // It's possible the ctpop has been truncated, but since we only care about
+    // the LSB we can look through it.
+    if (Tmp.getOpcode() == ISD::TRUNCATE && Tmp.getOperand(0).hasOneUse())
+      Tmp = Tmp.getOperand(0);
+
+    if (Tmp.getOpcode() == ISD::CTPOP) {
+      SDLoc dl(N);
+      SDValue Parity =
+          DAG.getNode(ISD::PARITY, dl, Tmp.getValueType(), Tmp.getOperand(0));
+      return DAG.getNode(ISD::TRUNCATE, dl, VT, Parity);
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7751ebb7705a3..71ba228b53f6f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -181,6 +181,7 @@ class SelectionDAGLegalize {
 
   SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
   SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
+  SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
   SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -2785,6 +2786,28 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
   }
 }
 
+/// Open code the operations for PARITY of the specified operation.
+SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
+  EVT VT = Op.getValueType();
+  EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+  unsigned Sz = VT.getScalarSizeInBits();
+
+  // If CTPOP is legal, use it. Otherwise use shifts and xor.
+  SDValue Result;
+  if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+    Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+  } else {
+    Result = Op;
+    for (unsigned i = Log2_32_Ceil(Sz); i != 0;) {
+      SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result,
+                                  DAG.getConstant(1 << (--i), dl, ShVT));
+      Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift);
+    }
+  }
+
+  return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT));
+}
+
 bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   LLVM_DEBUG(dbgs() << "Trying to expand node\n");
   SmallVector<SDValue, 8> Results;
@@ -2816,6 +2839,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::BSWAP:
     Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
     break;
+  case ISD::PARITY:
+    Results.push_back(ExpandPARITY(Node->getOperand(0), dl));
+    break;
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
   case ISD::FRAME_TO_ARGS_OFFSET:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index bfe1b365efc4d..0000fcb1dde1b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -62,7 +62,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
   case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
-  case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::PARITY:
+  case ISD::CTPOP:       Res = PromoteIntRes_CTPOP_PARITY(N); break;
   case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
   case ISD::EXTRACT_VECTOR_ELT:
@@ -503,10 +504,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
                       NVT));
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
-  // Zero extend to the promoted type and do the count there.
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+  // Zero extend to the promoted type and do the count or parity there.
   SDValue Op = ZExtPromotedInteger(N->getOperand(0));
-  return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op);
+  return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -1980,6 +1981,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
   case ISD::BITREVERSE:  ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
   case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
+  case ISD::PARITY:      ExpandIntRes_PARITY(N, Lo, Hi); break;
   case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
   case ISD::ABS:         ExpandIntRes_ABS(N, Lo, Hi); break;
   case ISD::CTLZ_ZERO_UNDEF:
@@ -2772,6 +2774,17 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
   Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDLoc dl(N);
+  // parity(HiLo) -> parity(Lo^Hi)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  Lo =
+      DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi));
+  Hi = DAG.getConstant(0, dl, NVT);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 34c563672753d..86f4fcc023dd9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -311,7 +311,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntRes_Constant(SDNode *N);
   SDValue PromoteIntRes_CTLZ(SDNode *N);
-  SDValue PromoteIntRes_CTPOP(SDNode *N);
+  SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N);
   SDValue PromoteIntRes_CTTZ(SDNode *N);
   SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
@@ -431,6 +431,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandIntRes_ADDSUBCARRY       (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BITREVERSE        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_PARITY            (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_MUL               (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_SDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_SREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fcd09b6141677..f854a4f4d35f8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -412,6 +412,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::CTTZ_ZERO_UNDEF:            return "cttz_zero_undef";
   case ISD::CTLZ:                       return "ctlz";
   case ISD::CTLZ_ZERO_UNDEF:            return "ctlz_zero_undef";
+  case ISD::PARITY:                     return "parity";
 
   // Trampolines
   case ISD::INIT_TRAMPOLINE:            return "init_trampoline";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 958bb7939046b..7ef37db68a28b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -692,6 +692,7 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
 
     setOperationAction(ISD::BITREVERSE, VT, Expand);
+    setOperationAction(ISD::PARITY, VT, Expand);
 
     // These library functions default to expand.
     setOperationAction(ISD::FROUND, VT, Expand);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8913dff47df42..5f7721267db0e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -385,6 +385,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
   setTruncStoreAction(MVT::f128, MVT::f16, Expand);
 
+  setOperationAction(ISD::PARITY, MVT::i8, Custom);
   if (Subtarget.hasPOPCNT()) {
     setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
   } else {
@@ -395,6 +396,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
     else
       setOperationAction(ISD::CTPOP        , MVT::i64  , Custom);
+
+    setOperationAction(ISD::PARITY, MVT::i16, Custom);
+    setOperationAction(ISD::PARITY, MVT::i32, Custom);
+    if (Subtarget.is64Bit())
+      setOperationAction(ISD::PARITY, MVT::i64, Custom);
   }
 
   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
@@ -28865,6 +28871,58 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
   return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
 }
 
+static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget,
+                           SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  SDValue X = Op.getOperand(0);
+  MVT VT = Op.getSimpleValueType();
+
+  // Special case. If the input fits in 8-bits we can use a single 8-bit TEST.
+  if (VT == MVT::i8 ||
+      DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) {
+    X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+    SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X,
+                                DAG.getConstant(0, DL, MVT::i8));
+    // Copy the inverse of the parity flag into a register with setcc.
+    SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+    // Extend to the original type.
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
+  }
+
+  if (VT == MVT::i64) {
+    // Xor the high and low 16-bits together using a 32-bit operation.
+    SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+                             DAG.getNode(ISD::SRL, DL, MVT::i64, X,
+                                         DAG.getConstant(32, DL, MVT::i8)));
+    SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+    X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
+  }
+
+  if (VT != MVT::i16) {
+    // Xor the high and low 16-bits together using a 32-bit operation.
+    SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X,
+                               DAG.getConstant(16, DL, MVT::i8));
+    X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16);
+  } else {
+    // If the input is 16-bits, we need to extend to use an i32 shift below.
+    X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X);
+  }
+
+  // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
+  // This should allow an h-reg to be used to save a shift.
+  SDValue Hi = DAG.getNode(
+      ISD::TRUNCATE, DL, MVT::i8,
+      DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8)));
+  SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+  SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
+  SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
+
+  // Copy the inverse of the parity flag into a register with setcc.
+  SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+  // Extend to the original type.
+  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
+}
+
 static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
                                         const X86Subtarget &Subtarget) {
   unsigned NewOpc = 0;
@@ -29483,6 +29541,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ATOMIC_LOAD_AND:    return lowerAtomicArith(Op, DAG, Subtarget);
   case ISD::ATOMIC_STORE:       return LowerATOMIC_STORE(Op, DAG, Subtarget);
   case ISD::BITREVERSE:         return LowerBITREVERSE(Op, Subtarget, DAG);
+  case ISD::PARITY:             return LowerPARITY(Op, Subtarget, DAG);
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
   case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
   case ISD::VECTOR_SHUFFLE:     return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG);
@@ -43285,89 +43344,6 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
   return SDValue();
 }
 
-// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
-// Turn it into series of XORs and a setnp.
-static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
-                             const X86Subtarget &Subtarget) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  // RHS needs to be 1.
-  if (!isOneConstant(N1))
-    return SDValue();
-
-  // Popcnt may be truncated.
-  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
-    N0 = N0.getOperand(0);
-
-  // LHS needs to be a single use CTPOP.
-  if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
-    return SDValue();
-
-  EVT VT = N0.getValueType();
-
-  // We only support 64-bit and 32-bit. 64-bit requires special handling
-  // unless the 64-bit popcnt instruction is legal.
-  if (VT != MVT::i32 && VT != MVT::i64)
-    return SDValue();
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT))
-    return SDValue();
-
-  SDLoc DL(N);
-  SDValue X = N0.getOperand(0);
-
-  // Special case. If the input fits in 8-bits we can use a single 8-bit TEST.
-  if (DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) {
-    X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
-    SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X,
-                                DAG.getConstant(0, DL, MVT::i8));
-    // Copy the inverse of the parity flag into a register with setcc.
-    SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
-    // Extend or truncate to the original type.
-    return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0));
-  }
-
-  // If this is 64-bit, its always best to xor the two 32-bit pieces together
-  // even if we have popcnt.
-  if (VT == MVT::i64) {
-    SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
-                             DAG.getNode(ISD::SRL, DL, VT, X,
-                                         DAG.getConstant(32, DL, MVT::i8)));
-    SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
-    X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
-    // Generate a 32-bit parity idiom. This will bring us back here if we need
-    // to expand it too.
-    SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32,
-                                 DAG.getNode(ISD::CTPOP, DL, MVT::i32, X),
-                                 DAG.getConstant(1, DL, MVT::i32));
-    return DAG.getZExtOrTrunc(Parity, DL, N->getValueType(0));
-  }
-  assert(VT == MVT::i32 && "Unexpected VT!");
-
-  // Xor the high and low 16-bits together using a 32-bit operation.
-  SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X,
-                             DAG.getConstant(16, DL, MVT::i8));
-  X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16);
-
-  // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
-  // This should allow an h-reg to be used to save a shift.
-  // FIXME: We only get an h-reg in 32-bit mode.
-  SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
-                           DAG.getNode(ISD::SRL, DL, VT, X,
-                                       DAG.getConstant(8, DL, MVT::i8)));
-  SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
-  SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
-  SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
-
-  // Copy the inverse of the parity flag into a register with setcc.
-  SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
-  // Extend or truncate to the original type.
-  return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0));
-}
-
-
 // Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C)
 // Where C is a mask containing the same number of bits as the setcc and
 // where the setcc will freely 0 upper bits of k-register. We can replace the
@@ -43459,10 +43435,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // This must be done before legalization has expanded the ctpop.
-  if (SDValue V = combineParity(N, DAG, Subtarget))
-    return V;
-
   // Match all-of bool scalar reductions into a bitcast/movmsk + cmp.
   // TODO: Support multiple SrcOps.
   if (VT == MVT::i1) {
diff --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll
new file mode 100644
index 0000000000000..bdddb6f1069ce
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/parity.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+define i4 @parity_4(i4 %x) {
+; CHECK-LABEL: parity_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xf
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i4 @llvm.ctpop.i4(i4 %x)
+  %2 = and i4 %1, 1
+  ret i4 %2
+}
+
+define i8 @parity_8(i8 %x) {
+; CHECK-LABEL: parity_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i8 @llvm.ctpop.i8(i8 %x)
+  %2 = and i8 %1, 1
+  ret i8 %2
+}
+
+define i16 @parity_16(i16 %x) {
+; CHECK-LABEL: parity_16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    eor w8, w8, w8, lsr #8
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i16 @llvm.ctpop.i16(i16 %x)
+  %2 = and i16 %1, 1
+  ret i16 %2
+}
+
+define i17 @parity_17(i17 %x) {
+; CHECK-LABEL: parity_17:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0x1ffff
+; CHECK-NEXT:    eor w8, w8, w8, lsr #16
+; CHECK-NEXT:    eor w8, w8, w8, lsr #8
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i17 @llvm.ctpop.i17(i17 %x)
+  %2 = and i17 %1, 1
+  ret i17 %2
+}
+
+define i32 @parity_32(i32 %x) {
+; CHECK-LABEL: parity_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor w8, w0, w0, lsr #16
+; CHECK-NEXT:    eor w8, w8, w8, lsr #8
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %2 = and i32 %1, 1
+  ret i32 %2
+}
+
+define i64 @parity_64(i64 %x) {
+; CHECK-LABEL: parity_64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x0, x0, lsr #32
+; CHECK-NEXT:    eor x8, x8, x8, lsr #16
+; CHECK-NEXT:    eor x8, x8, x8, lsr #8
+; CHECK-NEXT:    eor x8, x8, x8, lsr #4
+; CHECK-NEXT:    eor x8, x8, x8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and x0, x8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %2 = and i64 %1, 1
+  ret i64 %2
+}
+
+define i32 @parity_64_trunc(i64 %x) {
+; CHECK-LABEL: parity_64_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x0, x0, lsr #32
+; CHECK-NEXT:    eor x8, x8, x8, lsr #16
+; CHECK-NEXT:    eor x8, x8, x8, lsr #8
+; CHECK-NEXT:    eor x8, x8, x8, lsr #4
+; CHECK-NEXT:    eor x8, x8, x8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %2 = trunc i64 %1 to i32
+  %3 = and i32 %2, 1
+  ret i32 %3
+}
+
+define i8 @parity_32_trunc(i32 %x) {
+; CHECK-LABEL: parity_32_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor w8, w0, w0, lsr #16
+; CHECK-NEXT:    eor w8, w8, w8, lsr #8
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %1 = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %2 = trunc i32 %1 to i8
+  %3 = and i8 %2, 1
+  ret i8 %3
+}
+
+define i32 @parity_8_zext(i8 %x) {
+; CHECK-LABEL: parity_8_zext:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %a = zext i8 %x to i32
+  %b = tail call i32 @llvm.ctpop.i32(i32 %a)
+  %c = and i32 %b, 1
+  ret i32 %c
+}
+
+define i32 @parity_8_mask(i32 %x) {
+; CHECK-LABEL: parity_8_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    eor w8, w8, w8, lsr #4
+; CHECK-NEXT:    eor w8, w8, w8, lsr #2
+; CHECK-NEXT:    eor w8, w8, w8, lsr #1
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %a = and i32 %x, 255
+  %b = tail call i32 @llvm.ctpop.i32(i32 %a)
+  %c = and i32 %b, 1
+  ret i32 %c
+}
+
+declare i4 @llvm.ctpop.i4(i4 %x)
+declare i8 @llvm.ctpop.i8(i8 %x)
+declare i16 @llvm.ctpop.i16(i16 %x)
+declare i17 @llvm.ctpop.i17(i17 %x)
+declare i32 @llvm.ctpop.i32(i32 %x)
+declare i64 @llvm.ctpop.i64(i64 %x)
diff --git a/llvm/test/CodeGen/ARM/parity.ll b/llvm/test/CodeGen/ARM/parity.ll
new file mode 100644
index 0000000000000..40c0d7bd32f11
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/parity.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s
+
+define i4 @parity_4(i4 %x) {
+; CHECK-LABEL: parity_4:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r0, r0, #15
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i4 @llvm.ctpop.i4(i4 %x)
+  %2 = and i4 %1, 1
+  ret i4 %2
+}
+
+define i8 @parity_8(i8 %x) {
+; CHECK-LABEL: parity_8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i8 @llvm.ctpop.i8(i8 %x)
+  %2 = and i8 %1, 1
+  ret i8 %2
+}
+
+define i16 @parity_16(i16 %x) {
+; CHECK-LABEL: parity_16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    uxth r0, r0
+; CHECK-NEXT:    eor r0, r0, r0, lsr #8
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i16 @llvm.ctpop.i16(i16 %x)
+  %2 = and i16 %1, 1
+  ret i16 %2
+}
+
+define i17 @parity_17(i17 %x) {
+; CHECK-LABEL: parity_17:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bfc r0, #17, #15
+; CHECK-NEXT:    eor r0, r0, r0, lsr #16
+; CHECK-NEXT:    eor r0, r0, r0, lsr #8
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i17 @llvm.ctpop.i17(i17 %x)
+  %2 = and i17 %1, 1
+  ret i17 %2
+}
+
+define i32 @parity_32(i32 %x) {
+; CHECK-LABEL: parity_32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    eor r0, r0, r0, lsr #16
+; CHECK-NEXT:    eor r0, r0, r0, lsr #8
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %2 = and i32 %1, 1
+  ret i32 %2
+}
+
+define i64 @parity_64(i64 %x) {
+; CHECK-LABEL: parity_64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    eor r0, r0, r1
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    eor r0, r0, r0, lsr #16
+; CHECK-NEXT:    eor r0, r0, r0, lsr #8
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %2 = and i64 %1, 1
+  ret i64 %2
+}
+
+define i32 @parity_64_trunc(i64 %x) {
+; CHECK-LABEL: parity_64_trunc:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    eor r0, r0, r1
+; CHECK-NEXT:    eor r0, r0, r0, lsr #16
+; CHECK-NEXT:    eor r0, r0, r0, lsr #8
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %2 = trunc i64 %1 to i32
+  %3 = and i32 %2, 1
+  ret i32 %3
+}
+
+define i8 @parity_32_trunc(i32 %x) {
+; CHECK-LABEL: parity_32_trunc:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    eor r0, r0, r0, lsr #16
+; CHECK-NEXT:    eor r0, r0, r0, lsr #8
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %1 = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %2 = trunc i32 %1 to i8
+  %3 = and i8 %2, 1
+  ret i8 %3
+}
+
+define i32 @parity_8_zext(i8 %x) {
+; CHECK-LABEL: parity_8_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %a = zext i8 %x to i32
+  %b = tail call i32 @llvm.ctpop.i32(i32 %a)
+  %c = and i32 %b, 1
+  ret i32 %c
+}
+
+define i32 @parity_8_mask(i32 %x) {
+; CHECK-LABEL: parity_8_mask:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    eor r0, r0, r0, lsr #4
+; CHECK-NEXT:    eor r0, r0, r0, lsr #2
+; CHECK-NEXT:    eor r0, r0, r0, lsr #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    bx lr
+  %a = and i32 %x, 255
+  %b = tail call i32 @llvm.ctpop.i32(i32 %a)
+  %c = and i32 %b, 1
+  ret i32 %c
+}
+
+declare i4 @llvm.ctpop.i4(i4 %x)
+declare i8 @llvm.ctpop.i8(i8 %x)
+declare i16 @llvm.ctpop.i16(i16 %x)
+declare i17 @llvm.ctpop.i17(i17 %x)
+declare i32 @llvm.ctpop.i32(i32 %x)
+declare i64 @llvm.ctpop.i64(i64 %x)
diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll
index 6289ab482426c..d7344a4a2ed78 100644
--- a/llvm/test/CodeGen/X86/parity.ll
+++ b/llvm/test/CodeGen/X86/parity.ll
@@ -4,6 +4,187 @@
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
 
+define i4 @parity_4(i4 %x) {
+; X86-NOPOPCNT-LABEL: parity_4:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    testb $15, {{[0-9]+}}(%esp)
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_4:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    testb $15, %dil
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_4:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    testb $15, {{[0-9]+}}(%esp)
+; X86-POPCNT-NEXT:    setnp %al
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_4:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    testb $15, %dil
+; X64-POPCNT-NEXT:    setnp %al
+; X64-POPCNT-NEXT:    retq
+  %1 = tail call i4 @llvm.ctpop.i4(i4 %x)
+  %2 = and i4 %1, 1
+  ret i4 %2
+}
+
+define i8 @parity_8(i8 %x) {
+; X86-NOPOPCNT-LABEL: parity_8:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_8:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    testb %dil, %dil
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_8:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; X86-POPCNT-NEXT:    setnp %al
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_8:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    testb %dil, %dil
+; X64-POPCNT-NEXT:    setnp %al
+; X64-POPCNT-NEXT:    retq
+  %1 = tail call i8 @llvm.ctpop.i8(i8 %x)
+  %2 = and i8 %1, 1
+  ret i8 %2
+}
+
+define i16 @parity_16(i16 %x) {
+; X86-NOPOPCNT-LABEL: parity_16:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X86-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_16:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    movl %edi, %ecx
+; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X64-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_16:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    popcntw {{[0-9]+}}(%esp), %ax
+; X86-POPCNT-NEXT:    andl $1, %eax
+; X86-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_16:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntw %di, %ax
+; X64-POPCNT-NEXT:    andl $1, %eax
+; X64-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-POPCNT-NEXT:    retq
+  %1 = tail call i16 @llvm.ctpop.i16(i16 %x)
+  %2 = and i16 %1, 1
+  ret i16 %2
+}
+
+define i16 @parity_16_load(i16* %x) {
+; X86-NOPOPCNT-LABEL: parity_16_load:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOPOPCNT-NEXT:    movzwl (%eax), %ecx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X86-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_16_load:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    movzwl (%rdi), %ecx
+; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X64-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_16_load:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT:    popcntw (%eax), %ax
+; X86-POPCNT-NEXT:    andl $1, %eax
+; X86-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_16_load:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntw (%rdi), %ax
+; X64-POPCNT-NEXT:    andl $1, %eax
+; X64-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-POPCNT-NEXT:    retq
+  %1 = load i16, i16* %x
+  %2 = tail call i16 @llvm.ctpop.i16(i16 %1)
+  %3 = and i16 %2, 1
+  ret i16 %3
+}
+
+define i17 @parity_17(i17 %x) {
+; X86-NOPOPCNT-LABEL: parity_17:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOPOPCNT-NEXT:    movl %ecx, %eax
+; X86-NOPOPCNT-NEXT:    andl $131071, %eax # imm = 0x1FFFF
+; X86-NOPOPCNT-NEXT:    movl %eax, %edx
+; X86-NOPOPCNT-NEXT:    shrl $16, %edx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %edx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X86-NOPOPCNT-NEXT:    xorb %dl, %ch
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_17:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    movl %edi, %eax
+; X64-NOPOPCNT-NEXT:    andl $131071, %eax # imm = 0x1FFFF
+; X64-NOPOPCNT-NEXT:    movl %eax, %ecx
+; X64-NOPOPCNT-NEXT:    shrl $16, %ecx
+; X64-NOPOPCNT-NEXT:    xorl %eax, %ecx
+; X64-NOPOPCNT-NEXT:    shrl $8, %edi
+; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X64-NOPOPCNT-NEXT:    xorb %cl, %dil
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_17:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    movl $131071, %eax # imm = 0x1FFFF
+; X86-POPCNT-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT:    popcntl %eax, %eax
+; X86-POPCNT-NEXT:    andl $1, %eax
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_17:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    andl $131071, %edi # imm = 0x1FFFF
+; X64-POPCNT-NEXT:    popcntl %edi, %eax
+; X64-POPCNT-NEXT:    andl $1, %eax
+; X64-POPCNT-NEXT:    retq
+  %1 = tail call i17 @llvm.ctpop.i17(i17 %x)
+  %2 = and i17 %1, 1
+  ret i17 %2
+}
+
 define i32 @parity_32(i32 %x) {
 ; X86-NOPOPCNT-LABEL: parity_32:
 ; X86-NOPOPCNT:       # %bb.0:
@@ -157,14 +338,14 @@ define i8 @parity_32_trunc(i32 %x) {
 ; X86-POPCNT-LABEL: parity_32_trunc:
 ; X86-POPCNT:       # %bb.0:
 ; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT:    andb $1, %al
+; X86-POPCNT-NEXT:    andl $1, %eax
 ; X86-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-POPCNT-NEXT:    retl
 ;
 ; X64-POPCNT-LABEL: parity_32_trunc:
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntl %edi, %eax
-; X64-POPCNT-NEXT:    andb $1, %al
+; X64-POPCNT-NEXT:    andl $1, %eax
 ; X64-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-POPCNT-NEXT:    retq
   %1 = tail call i32 @llvm.ctpop.i32(i32 %x)
@@ -241,5 +422,9 @@ define i32 @parity_8_mask(i32 %x) {
   ret i32 %c
 }
 
+declare i4 @llvm.ctpop.i4(i4 %x)
+declare i8 @llvm.ctpop.i8(i8 %x)
+declare i16 @llvm.ctpop.i16(i16 %x)
+declare i17 @llvm.ctpop.i17(i17 %x)
 declare i32 @llvm.ctpop.i32(i32 %x)
 declare i64 @llvm.ctpop.i64(i64 %x)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index fb019ffd99e9b..06a428c514a78 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -53,7 +53,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
 ; AVX512VL-NEXT:    vpsllq $63, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vptestmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    testb $3, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <2 x i64> %0 to <2 x i1>
@@ -103,7 +103,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
 ; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    testb $15, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i32> %0 to <4 x i1>
@@ -251,7 +251,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
 ; AVX512VL-NEXT:    vpsllq $63, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vptestmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    testb $15, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -974,7 +974,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    testb $3, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <2 x i64> %0, zeroinitializer
@@ -1025,7 +1025,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    testb $15, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i32> %0, zeroinitializer
@@ -1214,7 +1214,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    testb $15, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq