Skip to content

Commit

Permalink
[X86] LowerCTPOP - add i3 and i4 LUT 'shift+mask' expansions
Browse files Browse the repository at this point in the history
Use the 3 or 4 active bits as a shift amount into a i32/i64 constant representing the number of set bits.

In future, it might be worthwhile to move this into a generic location in case other targets want to make use of them.

Another expansion pulled from #79823
  • Loading branch information
RKSimon committed Feb 21, 2024
1 parent 98a07f7 commit b8c9b06
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 37 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31065,6 +31065,38 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
return DAG.getZExtOrTrunc(Op, DL, VT);
}

// i3 CTPOP - perform LUT into i32 integer.
if (ShiftedActiveBits <= 3) {
if (ActiveBits > 3)
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
DAG.getShiftAmountConstant(TZ, VT, DL));
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
Op = DAG.getNode(ISD::SHL, DL, MVT::i32, Op,
DAG.getShiftAmountConstant(1, VT, DL));
Op = DAG.getNode(ISD::SRL, DL, MVT::i32,
DAG.getConstant(0b1110100110010100U, DL, MVT::i32), Op);
Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
DAG.getConstant(0x3, DL, MVT::i32));
return DAG.getZExtOrTrunc(Op, DL, VT);
}

// i4 CTPOP - perform LUT into i64 integer.
if (ShiftedActiveBits <= 4 &&
DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64)) {
SDValue LUT = DAG.getConstant(0x4332322132212110ULL, DL, MVT::i64);
if (ActiveBits > 4)
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
DAG.getShiftAmountConstant(TZ, VT, DL));
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
DAG.getConstant(4, DL, MVT::i32));
Op = DAG.getNode(ISD::SRL, DL, MVT::i64, LUT,
DAG.getShiftAmountOperand(MVT::i64, Op));
Op = DAG.getNode(ISD::AND, DL, MVT::i64, Op,
DAG.getConstant(0x7, DL, MVT::i64));
return DAG.getZExtOrTrunc(Op, DL, VT);
}

// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
if (ShiftedActiveBits <= 8) {
SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);
Expand Down
75 changes: 38 additions & 37 deletions llvm/test/CodeGen/X86/ctpop-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,23 +106,24 @@ define i32 @ctpop_mask3(i32 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_mask3:
; X86-NO-POPCOUNT: # %bb.0:
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-POPCOUNT-NEXT: andl $5, %eax
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-POPCOUNT-NEXT: andl $5, %ecx
; X86-NO-POPCOUNT-NEXT: addl %ecx, %ecx
; X86-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
; X86-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NO-POPCOUNT-NEXT: shrl %cl, %eax
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_mask3:
; X64-NO-POPCOUNT: # %bb.0:
; X64-NO-POPCOUNT-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NO-POPCOUNT-NEXT: andl $5, %edi
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: leal (%rdi,%rdi), %ecx
; X64-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-POPCOUNT-NEXT: shrl %cl, %eax
; X64-NO-POPCOUNT-NEXT: andl $3, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i32 %x, 5 ; 0b101
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
Expand All @@ -147,24 +148,23 @@ define i16 @ctpop_shifted_mask3(i16 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
; X86-NO-POPCOUNT: # %bb.0:
; X86-NO-POPCOUNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NO-POPCOUNT-NEXT: andl $14, %eax
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NO-POPCOUNT-NEXT: andl $14, %ecx
; X86-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
; X86-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NO-POPCOUNT-NEXT: shrl %cl, %eax
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
; X86-NO-POPCOUNT-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
; X64-NO-POPCOUNT: # %bb.0:
; X64-NO-POPCOUNT-NEXT: andl $14, %edi
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: movl %edi, %ecx
; X64-NO-POPCOUNT-NEXT: andl $14, %ecx
; X64-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-POPCOUNT-NEXT: shrl %cl, %eax
; X64-NO-POPCOUNT-NEXT: andl $3, %eax
; X64-NO-POPCOUNT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i16 %x, 14 ; 7 << 1
Expand Down Expand Up @@ -202,11 +202,11 @@ define i64 @ctpop_mask4(i64 %x) nounwind readnone {
; X64-NO-POPCOUNT-LABEL: ctpop_mask4:
; X64-NO-POPCOUNT: # %bb.0:
; X64-NO-POPCOUNT-NEXT: andl $15, %edi
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: leal (,%rdi,4), %ecx
; X64-NO-POPCOUNT-NEXT: movabsq $4841987667533046032, %rax # imm = 0x4332322132212110
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-POPCOUNT-NEXT: shrq %cl, %rax
; X64-NO-POPCOUNT-NEXT: andl $7, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i64 %x, 15
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)
Expand Down Expand Up @@ -241,13 +241,14 @@ define i32 @ctpop_shifted_mask4(i32 %x) nounwind readnone {
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
; X64-NO-POPCOUNT: # %bb.0:
; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
; X64-NO-POPCOUNT-NEXT: andl $15, %edi
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: movl %edi, %ecx
; X64-NO-POPCOUNT-NEXT: shrl $7, %ecx
; X64-NO-POPCOUNT-NEXT: andl $60, %ecx
; X64-NO-POPCOUNT-NEXT: movabsq $4841987667533046032, %rax # imm = 0x4332322132212110
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-POPCOUNT-NEXT: shrq %cl, %rax
; X64-NO-POPCOUNT-NEXT: andl $7, %eax
; X64-NO-POPCOUNT-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i32 %x, 7680 ; 15 << 9
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
Expand Down

0 comments on commit b8c9b06

Please sign in to comment.