From f9a30e695a74dd7be4b3f167d3f82d8569c3a98d Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 18 Nov 2025 23:57:00 +0800 Subject: [PATCH 1/4] [RISCV][GISel] Compute CTPOP of small odd-size integer correctly --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 12 ++++ .../test/CodeGen/RISCV/GlobalISel/bitmanip.ll | 56 ++++++++++++++++++ .../legalizer/legalize-ctpop-rv64.mir | 57 +++++++++++++++++++ 3 files changed, 125 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d02f097fef829..e18fe67a82a17 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7678,6 +7678,18 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; + // Lift small odd-size integer to 8-bit integer. + if (Size < 8) { + LLT NewTy = LLT::scalar(8); + auto ZExt = B.buildZExt(NewTy, SrcReg); + auto NewCTPOP = B.buildCTPOP(NewTy, ZExt); + Observer.changingInstr(MI); + MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); + MI.getOperand(1).setReg(NewCTPOP.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + // Count set bits in blocks of 2 bits. Default approach would be // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } // We use following formula instead: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll index 68bc1e5db6095..52d96dd265899 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll @@ -205,3 +205,59 @@ define i24 @bitreverse_i24(i24 %x) { %rev = call i24 @llvm.bitreverse.i24(i24 %x) ret i24 %rev } + +define i2 @test_ctpop_i2(i2 %a) { +; RV32-LABEL: test_ctpop_i2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: andi a0, a0, 3 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: zext.b a1, a0 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: andi a1, a1, 51 +; RV32-NEXT: andi a0, a0, 51 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: andi a0, a0, 15 +; RV32-NEXT: li a1, 1 +; RV32-NEXT: call __mulsi3 +; RV32-NEXT: zext.b a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_ctpop_i2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: andi a0, a0, 3 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: zext.b a1, a0 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: andi a1, a1, 51 +; RV64-NEXT: andi a0, a0, 51 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: andi a0, a0, 15 +; RV64-NEXT: li a1, 1 +; RV64-NEXT: call __muldi3 +; RV64-NEXT: zext.b a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %1 = call i2 @llvm.ctpop.i2(i2 %a) + ret i2 %1 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index c61c46df0a434..720417211385e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -216,3 +216,60 @@ body: | PseudoRET implicit $x10 ... + +... +--- +name: ctpop_i2 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: ctpop_i2 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]] + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64) + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64) + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]] + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]] + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64) + ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: ctpop_i2 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s2) = G_TRUNC %1(s64) + %2:_(s2) = G_CTPOP %0(s2) + %3:_(s64) = G_ANYEXT %2(s2) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... From f3a2955ae2d9e5ead340010b7613aea47ac19754 Mon Sep 17 00:00:00 2001 From: XChy Date: Wed, 19 Nov 2025 01:24:24 +0800 Subject: [PATCH 2/4] resolve comment --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 12 ++- .../test/CodeGen/RISCV/GlobalISel/bitmanip.ll | 84 +++++++++++++++++++ .../legalizer/legalize-ctpop-rv64.mir | 59 ++++++++++++- 3 files changed, 146 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e18fe67a82a17..8fdce782aaaa9 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7678,14 +7678,12 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; - // Lift small odd-size integer to 8-bit integer. - if (Size < 8) { - LLT NewTy = LLT::scalar(8); - auto ZExt = B.buildZExt(NewTy, SrcReg); - auto NewCTPOP = B.buildCTPOP(NewTy, ZExt); + // Lift odd-size integer to multiple of 8 bit. + if (Size % 8 != 0) { + LLT NewTy = LLT::scalar(alignTo(Size, 8)); Observer.changingInstr(MI); - MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); - MI.getOperand(1).setReg(NewCTPOP.getReg(0)); + widenScalarSrc(MI, NewTy, 1, TargetOpcode::G_ZEXT); + widenScalarDst(MI, NewTy, 0); Observer.changedInstr(MI); return Legalized; } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll index 52d96dd265899..cb12ef20b0994 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll @@ -261,3 +261,87 @@ define i2 @test_ctpop_i2(i2 %a) { %1 = call i2 @llvm.ctpop.i2(i2 %a) ret i2 %1 } + +define i11 @test_ctpop_i11(i11 %a) { +; RV32-LABEL: test_ctpop_i11: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: andi a0, a0, 2047 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: srli a3, a0, 1 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: lui a3, 3 +; RV32-NEXT: addi s0, a2, -1 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: li a1, 257 +; RV32-NEXT: call __mulsi3 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: srli a0, a0, 8 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_ctpop_i11: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: andi a0, a0, 2047 +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: srli a3, a0, 1 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: and a1, a3, a1 +; RV64-NEXT: lui a3, 3 +; RV64-NEXT: addi s0, a2, -1 +; RV64-NEXT: addi a2, a3, 819 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: lui a2, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: addi a1, a2, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: li a1, 257 +; RV64-NEXT: call __muldi3 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: srli a0, a0, 8 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %1 = call i11 @llvm.ctpop.i11(i11 %a) + ret i11 %1 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index 720417211385e..aae48cf7df6d0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -215,8 +215,6 @@ body: | $x10 = COPY %1(s64) PseudoRET implicit $x10 -... - ... --- name: ctpop_i2 @@ -273,3 +271,60 @@ body: | PseudoRET implicit $x10 ... +--- +name: ctpop_i11 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: ctpop_i11 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 21845 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]] + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64) + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64) + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 13107 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]] + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]] + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3855 + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 257 + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C8]] + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C4]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND6]], [[C9]](s64) + ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: ctpop_i11 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s11) = G_TRUNC %1(s64) + %2:_(s11) = G_CTPOP %0(s11) + %3:_(s64) = G_ANYEXT %2(s11) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... From 89d87397f4165b166b221f58cf258a7f0af35cbb Mon Sep 17 00:00:00 2001 From: XChy Date: Wed, 19 Nov 2025 02:03:02 +0800 Subject: [PATCH 3/4] resolve comment --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 12 +++--------- llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 5 ++++- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8fdce782aaaa9..7b0336ff83003 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7678,15 +7678,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; - // Lift odd-size integer to multiple of 8 bit. - if (Size % 8 != 0) { - LLT NewTy = LLT::scalar(alignTo(Size, 8)); - Observer.changingInstr(MI); - widenScalarSrc(MI, NewTy, 1, TargetOpcode::G_ZEXT); - widenScalarDst(MI, NewTy, 0); - Observer.changedInstr(MI); - return Legalized; - } + // Bail out on irregular type lengths. + if (Size >= 128 || Size % 8 != 0) + return UnableToLegalize; // Count set bits in blocks of 2 bits. Default approach would be // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index b1794b78a3e2a..1fba16d3d51c2 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -238,7 +238,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, sXLen, sXLen) .scalarSameSizeAs(1, 0); } else { - CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); + CTPOPActions.widenScalarToNextPow2(0, /*Min*/ 8) + .clampScalar(0, s8, sXLen) + .scalarSameSizeAs(1, 0) + .lower(); } getActionDefinitionsBuilder(G_CONSTANT) From 267cf466b56ad3d994662ede0a6538bc0334eba5 Mon Sep 17 00:00:00 2001 From: XChy Date: Wed, 19 Nov 2025 02:11:36 +0800 Subject: [PATCH 4/4] fix nit --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 7b0336ff83003..9c4e49852be2b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7679,7 +7679,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { MachineIRBuilder &B = MIRBuilder; // Bail out on irregular type lengths. - if (Size >= 128 || Size % 8 != 0) + if (Size > 128 || Size % 8 != 0) return UnableToLegalize; // Count set bits in blocks of 2 bits. Default approach would be