Skip to content

Conversation

@XChy
Copy link
Member

@XChy XChy commented Nov 18, 2025

Fixes the assertion in #168523
This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly.

@llvmbot
Copy link
Member

llvmbot commented Nov 18, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Hongyu Chen (XChy)

Changes

Fixes the assertion in #168523
This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly.


Full diff: https://github.com/llvm/llvm-project/pull/168559.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+12)
  • (modified) llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll (+56)
  • (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir (+57)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d02f097fef829..e18fe67a82a17 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7678,6 +7678,18 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
     unsigned Size = Ty.getSizeInBits();
     MachineIRBuilder &B = MIRBuilder;
 
+    // Lift small odd-size integer to 8-bit integer.
+    if (Size < 8) {
+      LLT NewTy = LLT::scalar(8);
+      auto ZExt = B.buildZExt(NewTy, SrcReg);
+      auto NewCTPOP = B.buildCTPOP(NewTy, ZExt);
+      Observer.changingInstr(MI);
+      MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+      MI.getOperand(1).setReg(NewCTPOP.getReg(0));
+      Observer.changedInstr(MI);
+      return Legalized;
+    }
+
     // Count set bits in blocks of 2 bits. Default approach would be
     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
     // We use following formula instead:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
index 68bc1e5db6095..52d96dd265899 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
@@ -205,3 +205,59 @@ define i24 @bitreverse_i24(i24 %x) {
   %rev = call i24 @llvm.bitreverse.i24(i24 %x)
   ret i24 %rev
 }
+
+define i2 @test_ctpop_i2(i2 %a) {
+; RV32-LABEL: test_ctpop_i2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    andi a0, a0, 3
+; RV32-NEXT:    srli a1, a0, 1
+; RV32-NEXT:    sub a0, a0, a1
+; RV32-NEXT:    zext.b a1, a0
+; RV32-NEXT:    srli a1, a1, 2
+; RV32-NEXT:    andi a1, a1, 51
+; RV32-NEXT:    andi a0, a0, 51
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    srli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    andi a0, a0, 15
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    call __mulsi3
+; RV32-NEXT:    zext.b a0, a0
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_ctpop_i2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    andi a0, a0, 3
+; RV64-NEXT:    srli a1, a0, 1
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    zext.b a1, a0
+; RV64-NEXT:    srli a1, a1, 2
+; RV64-NEXT:    andi a1, a1, 51
+; RV64-NEXT:    andi a0, a0, 51
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    srli a1, a0, 4
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    andi a0, a0, 15
+; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    call __muldi3
+; RV64-NEXT:    zext.b a0, a0
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    .cfi_def_cfa_offset 0
+; RV64-NEXT:    ret
+  %1 = call i2 @llvm.ctpop.i2(i2 %a)
+  ret i2 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
index c61c46df0a434..720417211385e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
@@ -216,3 +216,60 @@ body:             |
     PseudoRET implicit $x10
 
 ...
+
+...
+---
+name:            ctpop_i2
+body:             |
+  bb.1:
+    liveins: $x10
+
+    ; RV64I-LABEL: name: ctpop_i2
+    ; RV64I: liveins: $x10
+    ; RV64I-NEXT: {{  $}}
+    ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
+    ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
+    ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
+    ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
+    ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
+    ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
+    ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+    ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
+    ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
+    ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
+    ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
+    ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
+    ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
+    ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
+    ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
+    ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
+    ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
+    ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
+    ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
+    ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
+    ; RV64I-NEXT: PseudoRET implicit $x10
+    ;
+    ; RV64ZBB-LABEL: name: ctpop_i2
+    ; RV64ZBB: liveins: $x10
+    ; RV64ZBB-NEXT: {{  $}}
+    ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+    ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
+    ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
+    ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
+    ; RV64ZBB-NEXT: PseudoRET implicit $x10
+    %1:_(s64) = COPY $x10
+    %0:_(s2) = G_TRUNC %1(s64)
+    %2:_(s2) = G_CTPOP %0(s2)
+    %3:_(s64) = G_ANYEXT %2(s2)
+    $x10 = COPY %3(s64)
+    PseudoRET implicit $x10
+
+...

@llvmbot
Copy link
Member

llvmbot commented Nov 18, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Hongyu Chen (XChy)

Changes

Fixes the assertion in #168523
This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly.


Full diff: https://github.com/llvm/llvm-project/pull/168559.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+12)
  • (modified) llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll (+56)
  • (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir (+57)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d02f097fef829..e18fe67a82a17 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7678,6 +7678,18 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
     unsigned Size = Ty.getSizeInBits();
     MachineIRBuilder &B = MIRBuilder;
 
+    // Lift small odd-size integer to 8-bit integer.
+    if (Size < 8) {
+      LLT NewTy = LLT::scalar(8);
+      auto ZExt = B.buildZExt(NewTy, SrcReg);
+      auto NewCTPOP = B.buildCTPOP(NewTy, ZExt);
+      Observer.changingInstr(MI);
+      MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+      MI.getOperand(1).setReg(NewCTPOP.getReg(0));
+      Observer.changedInstr(MI);
+      return Legalized;
+    }
+
     // Count set bits in blocks of 2 bits. Default approach would be
     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
     // We use following formula instead:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
index 68bc1e5db6095..52d96dd265899 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
@@ -205,3 +205,59 @@ define i24 @bitreverse_i24(i24 %x) {
   %rev = call i24 @llvm.bitreverse.i24(i24 %x)
   ret i24 %rev
 }
+
+define i2 @test_ctpop_i2(i2 %a) {
+; RV32-LABEL: test_ctpop_i2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    andi a0, a0, 3
+; RV32-NEXT:    srli a1, a0, 1
+; RV32-NEXT:    sub a0, a0, a1
+; RV32-NEXT:    zext.b a1, a0
+; RV32-NEXT:    srli a1, a1, 2
+; RV32-NEXT:    andi a1, a1, 51
+; RV32-NEXT:    andi a0, a0, 51
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    srli a1, a0, 4
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    andi a0, a0, 15
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    call __mulsi3
+; RV32-NEXT:    zext.b a0, a0
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_ctpop_i2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    andi a0, a0, 3
+; RV64-NEXT:    srli a1, a0, 1
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    zext.b a1, a0
+; RV64-NEXT:    srli a1, a1, 2
+; RV64-NEXT:    andi a1, a1, 51
+; RV64-NEXT:    andi a0, a0, 51
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    srli a1, a0, 4
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    andi a0, a0, 15
+; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    call __muldi3
+; RV64-NEXT:    zext.b a0, a0
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    .cfi_def_cfa_offset 0
+; RV64-NEXT:    ret
+  %1 = call i2 @llvm.ctpop.i2(i2 %a)
+  ret i2 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
index c61c46df0a434..720417211385e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
@@ -216,3 +216,60 @@ body:             |
     PseudoRET implicit $x10
 
 ...
+
+...
+---
+name:            ctpop_i2
+body:             |
+  bb.1:
+    liveins: $x10
+
+    ; RV64I-LABEL: name: ctpop_i2
+    ; RV64I: liveins: $x10
+    ; RV64I-NEXT: {{  $}}
+    ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
+    ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
+    ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
+    ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
+    ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
+    ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
+    ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+    ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
+    ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
+    ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
+    ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
+    ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
+    ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
+    ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
+    ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
+    ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
+    ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
+    ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
+    ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
+    ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
+    ; RV64I-NEXT: PseudoRET implicit $x10
+    ;
+    ; RV64ZBB-LABEL: name: ctpop_i2
+    ; RV64ZBB: liveins: $x10
+    ; RV64ZBB-NEXT: {{  $}}
+    ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+    ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
+    ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
+    ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
+    ; RV64ZBB-NEXT: PseudoRET implicit $x10
+    %1:_(s64) = COPY $x10
+    %0:_(s2) = G_TRUNC %1(s64)
+    %2:_(s2) = G_CTPOP %0(s2)
+    %3:_(s64) = G_ANYEXT %2(s2)
+    $x10 = COPY %3(s64)
+    PseudoRET implicit $x10
+
+...

@github-actions
Copy link

🐧 Linux x64 Test Results

  • 186304 tests passed
  • 4857 tests skipped

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@XChy XChy enabled auto-merge (squash) November 18, 2025 18:20
@XChy XChy merged commit 523bd2d into llvm:main Nov 18, 2025
7 of 9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants