[X86] Use parity flag from byte test/cmp instruction for __builtin_pa…

…rity when input fits in 8 bits. If the upper bits of the __builtin_parity idiom are known to be 0 we were previously emitting an xor with 0 to get the parity flag. But we can use cmp/test instead which may expose opportunities for load folding or combining an AND.
llvm · Aug 2, 2020 · 64516ec · 64516ec
1 parent a258338
commit 64516ec
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 96 deletions.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42773,6 +42773,17 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
   SDLoc DL(N);
   SDValue X = N0.getOperand(0);
 
+  // Special case. If the input fits in 8-bits we can use a single 8-bit TEST.
+  if (DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) {
+    X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+    SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X,
+                                DAG.getConstant(0, DL, MVT::i8));
+    // Copy the inverse of the parity flag into a register with setcc.
+    SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+    // Extend or truncate to the original type.
+    return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0));
+  }
+
   // If this is 64-bit, its always best to xor the two 32-bit pieces together
   // even if we have popcnt.
   if (VT == MVT::i64) {

diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll
@@ -184,16 +184,15 @@ define i8 @parity_32_trunc(i32 %x) {
 define i32 @parity_8_zext(i8 %x) {
 ; X86-NOPOPCNT-LABEL: parity_8_zext:
 ; X86-NOPOPCNT:       # %bb.0:
-; X86-NOPOPCNT-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
-; X86-NOPOPCNT-NEXT:    xorb $0, %cl
+; X86-NOPOPCNT-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
 ; X86-NOPOPCNT-NEXT:    setnp %al
 ; X86-NOPOPCNT-NEXT:    retl
 ;
 ; X64-NOPOPCNT-LABEL: parity_8_zext:
 ; X64-NOPOPCNT:       # %bb.0:
 ; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
-; X64-NOPOPCNT-NEXT:    xorb $0, %dil
+; X64-NOPOPCNT-NEXT:    testb %dil, %dil
 ; X64-NOPOPCNT-NEXT:    setnp %al
 ; X64-NOPOPCNT-NEXT:    retq
 ;
@@ -219,16 +218,15 @@ define i32 @parity_8_zext(i8 %x) {
 define i32 @parity_8_mask(i32 %x) {
 ; X86-NOPOPCNT-LABEL: parity_8_mask:
 ; X86-NOPOPCNT:       # %bb.0:
-; X86-NOPOPCNT-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
-; X86-NOPOPCNT-NEXT:    xorb $0, %cl
+; X86-NOPOPCNT-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
 ; X86-NOPOPCNT-NEXT:    setnp %al
 ; X86-NOPOPCNT-NEXT:    retl
 ;
 ; X64-NOPOPCNT-LABEL: parity_8_mask:
 ; X64-NOPOPCNT:       # %bb.0:
 ; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
-; X64-NOPOPCNT-NEXT:    xorb $0, %dil
+; X64-NOPOPCNT-NEXT:    testb %dil, %dil
 ; X64-NOPOPCNT-NEXT:    setnp %al
 ; X64-NOPOPCNT-NEXT:    retq
 ;