diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0824e11574009..f2163373ccaed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2662,7 +2662,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { // If CTPOP is legal, use it. Otherwise use shifts and xor. SDValue Result; - if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) { Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); } else { Result = Op; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fc6bde29b50fc..2a012666b4719 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -431,6 +431,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::PARITY, MVT::i64, Custom); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); + // popcntw is longer to encode than popcntl and also has a false dependency + // on the dest that popcntl hasn't had since Cannon Lake. + setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32); } else { setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll index 0d5f8f4fd0c53..e52a32d261ed2 100644 --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -18,7 +18,7 @@ define i1 @canonical_parity(<16 x i1> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax +; CHECK-NEXT: popcntl %eax, %eax ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq @@ -33,8 +33,8 @@ define i1 @canonical_parity_noncanonical_pred(<16 x i1> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax -; CHECK-NEXT: # kill: def $al killed $al killed $ax +; CHECK-NEXT: popcntl %eax, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) @@ -62,7 +62,7 @@ define i1 @canonical_nonparity(<16 x i1> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax +; CHECK-NEXT: popcntl %eax, %eax ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq @@ -77,7 +77,7 @@ define i1 @canonical_nonparity_noncanonical_pred(<16 x i1> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax +; CHECK-NEXT: popcntl %eax, %eax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: xorb $1, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index ce403c6e8e7fd..5f33db141b542 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -59,14 +59,16 @@ define i16 @parity_16(i16 %x) { ; ; X86-POPCNT-LABEL: parity_16: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_16: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: movzwl %di, %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax ; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq @@ -98,14 +100,16 @@ define i16 @parity_16_load(i16* %x) { ; X86-POPCNT-LABEL: parity_16_load: ; X86-POPCNT: # %bb.0: ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: popcntw (%eax), %ax +; X86-POPCNT-NEXT: movzwl (%eax), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_16_load: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw (%rdi), %ax +; X64-POPCNT-NEXT: movzwl (%rdi), %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax ; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq @@ -420,7 +424,8 @@ define i16 @parity_16_shift(i16 %0) { ; ; X86-POPCNT-LABEL: parity_16_shift: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: addl %eax, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax @@ -428,7 +433,8 @@ define i16 @parity_16_shift(i16 %0) { ; ; X64-POPCNT-LABEL: parity_16_shift: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: movzwl %di, %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax ; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: addl %eax, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll index bc24b41ff3d9f..ad0fb4ccfd85b 100644 --- a/llvm/test/CodeGen/X86/popcnt.ll +++ b/llvm/test/CodeGen/X86/popcnt.ll @@ -107,12 +107,16 @@ define i16 @cnt16(i16 %x) nounwind readnone { ; ; X86-POPCNT-LABEL: cnt16: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: cnt16: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: movzwl %di, %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax +; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) ret i16 %cnt @@ -1564,14 +1568,13 @@ define i32 @popcount_i16_zext(i16 zeroext %x) { ; ; X86-POPCNT-LABEL: popcount_i16_zext: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax -; X86-POPCNT-NEXT: movzwl %ax, %eax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: popcount_i16_zext: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax -; X64-POPCNT-NEXT: movzwl %ax, %eax +; X64-POPCNT-NEXT: popcntl %edi, %eax ; X64-POPCNT-NEXT: retq %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) %z = zext i16 %cnt to i32