Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7678,6 +7678,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;

// Bail out on irregular type lengths.
if (Size > 128 || Size % 8 != 0)
return UnableToLegalize;

// Count set bits in blocks of 2 bits. Default approach would be
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
// We use following formula instead:
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, sXLen, sXLen)
.scalarSameSizeAs(1, 0);
} else {
CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
CTPOPActions.widenScalarToNextPow2(0, /*Min*/ 8)
.clampScalar(0, s8, sXLen)
.scalarSameSizeAs(1, 0)
.lower();
}

getActionDefinitionsBuilder(G_CONSTANT)
Expand Down
140 changes: 140 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,143 @@ define i24 @bitreverse_i24(i24 %x) {
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
ret i24 %rev
}

define i2 @test_ctpop_i2(i2 %a) {
; RV32-LABEL: test_ctpop_i2:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: andi a0, a0, 3
; RV32-NEXT: srli a1, a0, 1
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: zext.b a1, a0
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: andi a1, a1, 51
; RV32-NEXT: andi a0, a0, 51
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: srli a1, a0, 4
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: andi a0, a0, 15
; RV32-NEXT: li a1, 1
; RV32-NEXT: call __mulsi3
; RV32-NEXT: zext.b a0, a0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_ctpop_i2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: andi a0, a0, 3
; RV64-NEXT: srli a1, a0, 1
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: zext.b a1, a0
; RV64-NEXT: srli a1, a1, 2
; RV64-NEXT: andi a1, a1, 51
; RV64-NEXT: andi a0, a0, 51
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: srli a1, a0, 4
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: andi a0, a0, 15
; RV64-NEXT: li a1, 1
; RV64-NEXT: call __muldi3
; RV64-NEXT: zext.b a0, a0
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%1 = call i2 @llvm.ctpop.i2(i2 %a)
ret i2 %1
}

define i11 @test_ctpop_i11(i11 %a) {
; RV32-LABEL: test_ctpop_i11:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: andi a0, a0, 2047
; RV32-NEXT: lui a1, 5
; RV32-NEXT: lui a2, 16
; RV32-NEXT: srli a3, a0, 1
; RV32-NEXT: addi a1, a1, 1365
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: lui a3, 3
; RV32-NEXT: addi s0, a2, -1
; RV32-NEXT: addi a2, a3, 819
; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: and a1, a0, s0
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: lui a2, 1
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: srli a1, a0, 4
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: addi a1, a2, -241
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: li a1, 257
; RV32-NEXT: call __mulsi3
; RV32-NEXT: and a0, a0, s0
; RV32-NEXT: srli a0, a0, 8
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: .cfi_restore s0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_ctpop_i11:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: lui a1, 5
; RV64-NEXT: lui a2, 16
; RV64-NEXT: srli a3, a0, 1
; RV64-NEXT: addi a1, a1, 1365
; RV64-NEXT: and a1, a3, a1
; RV64-NEXT: lui a3, 3
; RV64-NEXT: addi s0, a2, -1
; RV64-NEXT: addi a2, a3, 819
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: and a1, a0, s0
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: srli a1, a1, 2
; RV64-NEXT: and a1, a1, a2
; RV64-NEXT: lui a2, 1
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: srli a1, a0, 4
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: addi a1, a2, -241
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: li a1, 257
; RV64-NEXT: call __muldi3
; RV64-NEXT: and a0, a0, s0
; RV64-NEXT: srli a0, a0, 8
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: .cfi_restore s0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%1 = call i11 @llvm.ctpop.i11(i11 %a)
ret i11 %1
}
112 changes: 112 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,115 @@ body: |
PseudoRET implicit $x10

...
---
name: ctpop_i2
body: |
bb.1:
liveins: $x10

; RV64I-LABEL: name: ctpop_i2
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
;
; RV64ZBB-LABEL: name: ctpop_i2
; RV64ZBB: liveins: $x10
; RV64ZBB-NEXT: {{ $}}
; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
; RV64ZBB-NEXT: PseudoRET implicit $x10
%1:_(s64) = COPY $x10
%0:_(s2) = G_TRUNC %1(s64)
%2:_(s2) = G_CTPOP %0(s2)
%3:_(s64) = G_ANYEXT %2(s2)
$x10 = COPY %3(s64)
PseudoRET implicit $x10

...
---
name: ctpop_i11
body: |
bb.1:
liveins: $x10

; RV64I-LABEL: name: ctpop_i11
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047
; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 21845
; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 13107
; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3855
; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 257
; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C8]]
; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C4]]
; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND6]], [[C9]](s64)
; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
;
; RV64ZBB-LABEL: name: ctpop_i11
; RV64ZBB: liveins: $x10
; RV64ZBB-NEXT: {{ $}}
; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047
; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
; RV64ZBB-NEXT: PseudoRET implicit $x10
%1:_(s64) = COPY $x10
%0:_(s11) = G_TRUNC %1(s64)
%2:_(s11) = G_CTPOP %0(s11)
%3:_(s64) = G_ANYEXT %2(s11)
$x10 = COPY %3(s64)
PseudoRET implicit $x10

...
Loading