diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 834b470a4a867..a86f13135173b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31065,6 +31065,38 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget, return DAG.getZExtOrTrunc(Op, DL, VT); } + // i3 CTPOP - perform LUT into i32 integer. + if (ShiftedActiveBits <= 3) { + if (ActiveBits > 3) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getShiftAmountConstant(TZ, VT, DL)); + Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32); + Op = DAG.getNode(ISD::SHL, DL, MVT::i32, Op, + DAG.getShiftAmountConstant(1, VT, DL)); + Op = DAG.getNode(ISD::SRL, DL, MVT::i32, + DAG.getConstant(0b1110100110010100U, DL, MVT::i32), Op); + Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op, + DAG.getConstant(0x3, DL, MVT::i32)); + return DAG.getZExtOrTrunc(Op, DL, VT); + } + + // i4 CTPOP - perform LUT into i64 integer. + if (ShiftedActiveBits <= 4 && + DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64)) { + SDValue LUT = DAG.getConstant(0x4332322132212110ULL, DL, MVT::i64); + if (ActiveBits > 4) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getShiftAmountConstant(TZ, VT, DL)); + Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32); + Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op, + DAG.getConstant(4, DL, MVT::i32)); + Op = DAG.getNode(ISD::SRL, DL, MVT::i64, LUT, + DAG.getShiftAmountOperand(MVT::i64, Op)); + Op = DAG.getNode(ISD::AND, DL, MVT::i64, Op, + DAG.getConstant(0x7, DL, MVT::i64)); + return DAG.getZExtOrTrunc(Op, DL, VT); + } + // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply. if (ShiftedActiveBits <= 8) { SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32); diff --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll index 4b03563fd9924..a43dba94d30c7 100644 --- a/llvm/test/CodeGen/X86/ctpop-mask.ll +++ b/llvm/test/CodeGen/X86/ctpop-mask.ll @@ -106,23 +106,24 @@ define i32 @ctpop_mask3(i32 %x) nounwind readnone { ; ; X86-NO-POPCOUNT-LABEL: ctpop_mask3: ; X86-NO-POPCOUNT: # %bb.0: -; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NO-POPCOUNT-NEXT: andl $5, %eax -; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201 -; X86-NO-POPCOUNT-NEXT: shrl $3, %eax -; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111 -; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 -; X86-NO-POPCOUNT-NEXT: shrl $28, %eax +; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-POPCOUNT-NEXT: andl $5, %ecx +; X86-NO-POPCOUNT-NEXT: addl %ecx, %ecx +; X86-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994 +; X86-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NO-POPCOUNT-NEXT: shrl %cl, %eax +; X86-NO-POPCOUNT-NEXT: andl $3, %eax ; X86-NO-POPCOUNT-NEXT: retl ; ; X64-NO-POPCOUNT-LABEL: ctpop_mask3: ; X64-NO-POPCOUNT: # %bb.0: +; X64-NO-POPCOUNT-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NO-POPCOUNT-NEXT: andl $5, %edi -; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201 -; X64-NO-POPCOUNT-NEXT: shrl $3, %eax -; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111 -; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 -; X64-NO-POPCOUNT-NEXT: shrl $28, %eax +; X64-NO-POPCOUNT-NEXT: leal (%rdi,%rdi), %ecx +; X64-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994 +; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-POPCOUNT-NEXT: shrl %cl, %eax +; X64-NO-POPCOUNT-NEXT: andl $3, %eax ; X64-NO-POPCOUNT-NEXT: retq %mask = and i32 %x, 5 ; 0b101 %count = tail call i32 @llvm.ctpop.i32(i32 %mask) @@ -147,24 +148,23 @@ define i16 @ctpop_shifted_mask3(i16 %x) nounwind readnone { ; ; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask3: ; X86-NO-POPCOUNT: # %bb.0: -; X86-NO-POPCOUNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NO-POPCOUNT-NEXT: andl $14, %eax -; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201 -; X86-NO-POPCOUNT-NEXT: shrl $3, %eax -; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111 -; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 -; X86-NO-POPCOUNT-NEXT: shrl $28, %eax +; X86-NO-POPCOUNT-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NO-POPCOUNT-NEXT: andl $14, %ecx +; X86-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994 +; X86-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NO-POPCOUNT-NEXT: shrl %cl, %eax +; X86-NO-POPCOUNT-NEXT: andl $3, %eax ; X86-NO-POPCOUNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NO-POPCOUNT-NEXT: retl ; ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask3: ; X64-NO-POPCOUNT: # %bb.0: -; X64-NO-POPCOUNT-NEXT: andl $14, %edi -; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201 -; X64-NO-POPCOUNT-NEXT: shrl $3, %eax -; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111 -; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 -; X64-NO-POPCOUNT-NEXT: shrl $28, %eax +; X64-NO-POPCOUNT-NEXT: movl %edi, %ecx +; X64-NO-POPCOUNT-NEXT: andl $14, %ecx +; X64-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994 +; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-POPCOUNT-NEXT: shrl %cl, %eax +; X64-NO-POPCOUNT-NEXT: andl $3, %eax ; X64-NO-POPCOUNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NO-POPCOUNT-NEXT: retq %mask = and i16 %x, 14 ; 7 << 1 @@ -202,11 +202,11 @@ define i64 @ctpop_mask4(i64 %x) nounwind readnone { ; X64-NO-POPCOUNT-LABEL: ctpop_mask4: ; X64-NO-POPCOUNT: # %bb.0: ; X64-NO-POPCOUNT-NEXT: andl $15, %edi -; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201 -; X64-NO-POPCOUNT-NEXT: shrl $3, %eax -; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111 -; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 -; X64-NO-POPCOUNT-NEXT: shrl $28, %eax +; X64-NO-POPCOUNT-NEXT: leal (,%rdi,4), %ecx +; X64-NO-POPCOUNT-NEXT: movabsq $4841987667533046032, %rax # imm = 0x4332322132212110 +; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-POPCOUNT-NEXT: shrq %cl, %rax +; X64-NO-POPCOUNT-NEXT: andl $7, %eax ; X64-NO-POPCOUNT-NEXT: retq %mask = and i64 %x, 15 %count = tail call i64 @llvm.ctpop.i64(i64 %mask) @@ -241,13 +241,14 @@ define i32 @ctpop_shifted_mask4(i32 %x) nounwind readnone { ; ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask4: ; X64-NO-POPCOUNT: # %bb.0: -; X64-NO-POPCOUNT-NEXT: shrl $9, %edi -; X64-NO-POPCOUNT-NEXT: andl $15, %edi -; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201 -; X64-NO-POPCOUNT-NEXT: shrl $3, %eax -; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111 -; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 -; X64-NO-POPCOUNT-NEXT: shrl $28, %eax +; X64-NO-POPCOUNT-NEXT: movl %edi, %ecx +; X64-NO-POPCOUNT-NEXT: shrl $7, %ecx +; X64-NO-POPCOUNT-NEXT: andl $60, %ecx +; X64-NO-POPCOUNT-NEXT: movabsq $4841987667533046032, %rax # imm = 0x4332322132212110 +; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NO-POPCOUNT-NEXT: shrq %cl, %rax +; X64-NO-POPCOUNT-NEXT: andl $7, %eax +; X64-NO-POPCOUNT-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NO-POPCOUNT-NEXT: retq %mask = and i32 %x, 7680 ; 15 << 9 %count = tail call i32 @llvm.ctpop.i32(i32 %mask)