-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[GISel][RISCV] Compute CTPOP of small odd-sized integer correctly #168559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-globalisel Author: Hongyu Chen (XChy) ChangesFixes the assertion in #168523 Full diff: https://github.com/llvm/llvm-project/pull/168559.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d02f097fef829..e18fe67a82a17 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7678,6 +7678,18 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
+ // Lift small odd-size integer to 8-bit integer.
+ if (Size < 8) {
+ LLT NewTy = LLT::scalar(8);
+ auto ZExt = B.buildZExt(NewTy, SrcReg);
+ auto NewCTPOP = B.buildCTPOP(NewTy, ZExt);
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+ MI.getOperand(1).setReg(NewCTPOP.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
// Count set bits in blocks of 2 bits. Default approach would be
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
// We use following formula instead:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
index 68bc1e5db6095..52d96dd265899 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
@@ -205,3 +205,59 @@ define i24 @bitreverse_i24(i24 %x) {
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
ret i24 %rev
}
+
+define i2 @test_ctpop_i2(i2 %a) {
+; RV32-LABEL: test_ctpop_i2:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: andi a0, a0, 3
+; RV32-NEXT: srli a1, a0, 1
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: zext.b a1, a0
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: andi a1, a1, 51
+; RV32-NEXT: andi a0, a0, 51
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: srli a1, a0, 4
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: andi a0, a0, 15
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: call __mulsi3
+; RV32-NEXT: zext.b a0, a0
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_ctpop_i2:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: andi a0, a0, 3
+; RV64-NEXT: srli a1, a0, 1
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: zext.b a1, a0
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: andi a1, a1, 51
+; RV64-NEXT: andi a0, a0, 51
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: srli a1, a0, 4
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: andi a0, a0, 15
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: call __muldi3
+; RV64-NEXT: zext.b a0, a0
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %1 = call i2 @llvm.ctpop.i2(i2 %a)
+ ret i2 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
index c61c46df0a434..720417211385e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
@@ -216,3 +216,60 @@ body: |
PseudoRET implicit $x10
...
+
+...
+---
+name: ctpop_i2
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; RV64I-LABEL: name: ctpop_i2
+ ; RV64I: liveins: $x10
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
+ ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
+ ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
+ ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
+ ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
+ ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+ ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
+ ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
+ ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
+ ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
+ ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
+ ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
+ ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
+ ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
+ ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
+ ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
+ ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
+ ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
+ ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV64ZBB-LABEL: name: ctpop_i2
+ ; RV64ZBB: liveins: $x10
+ ; RV64ZBB-NEXT: {{ $}}
+ ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+ ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
+ ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
+ ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV64ZBB-NEXT: PseudoRET implicit $x10
+ %1:_(s64) = COPY $x10
+ %0:_(s2) = G_TRUNC %1(s64)
+ %2:_(s2) = G_CTPOP %0(s2)
+ %3:_(s64) = G_ANYEXT %2(s2)
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+
+...
|
|
@llvm/pr-subscribers-backend-risc-v Author: Hongyu Chen (XChy) ChangesFixes the assertion in #168523 Full diff: https://github.com/llvm/llvm-project/pull/168559.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d02f097fef829..e18fe67a82a17 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7678,6 +7678,18 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
+ // Lift small odd-size integer to 8-bit integer.
+ if (Size < 8) {
+ LLT NewTy = LLT::scalar(8);
+ auto ZExt = B.buildZExt(NewTy, SrcReg);
+ auto NewCTPOP = B.buildCTPOP(NewTy, ZExt);
+ Observer.changingInstr(MI);
+ MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+ MI.getOperand(1).setReg(NewCTPOP.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
// Count set bits in blocks of 2 bits. Default approach would be
// B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
// We use following formula instead:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
index 68bc1e5db6095..52d96dd265899 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
@@ -205,3 +205,59 @@ define i24 @bitreverse_i24(i24 %x) {
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
ret i24 %rev
}
+
+define i2 @test_ctpop_i2(i2 %a) {
+; RV32-LABEL: test_ctpop_i2:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: andi a0, a0, 3
+; RV32-NEXT: srli a1, a0, 1
+; RV32-NEXT: sub a0, a0, a1
+; RV32-NEXT: zext.b a1, a0
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: andi a1, a1, 51
+; RV32-NEXT: andi a0, a0, 51
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: srli a1, a0, 4
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: andi a0, a0, 15
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: call __mulsi3
+; RV32-NEXT: zext.b a0, a0
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_ctpop_i2:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: andi a0, a0, 3
+; RV64-NEXT: srli a1, a0, 1
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: zext.b a1, a0
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: andi a1, a1, 51
+; RV64-NEXT: andi a0, a0, 51
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: srli a1, a0, 4
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: andi a0, a0, 15
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: call __muldi3
+; RV64-NEXT: zext.b a0, a0
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %1 = call i2 @llvm.ctpop.i2(i2 %a)
+ ret i2 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
index c61c46df0a434..720417211385e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir
@@ -216,3 +216,60 @@ body: |
PseudoRET implicit $x10
...
+
+...
+---
+name: ctpop_i2
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; RV64I-LABEL: name: ctpop_i2
+ ; RV64I: liveins: $x10
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]]
+ ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64)
+ ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85
+ ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]]
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
+ ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]]
+ ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+ ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]]
+ ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64)
+ ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51
+ ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]]
+ ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]]
+ ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]]
+ ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64)
+ ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]]
+ ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
+ ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]]
+ ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]]
+ ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64)
+ ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ ;
+ ; RV64ZBB-LABEL: name: ctpop_i2
+ ; RV64ZBB: liveins: $x10
+ ; RV64ZBB-NEXT: {{ $}}
+ ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
+ ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64)
+ ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64)
+ ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64)
+ ; RV64ZBB-NEXT: PseudoRET implicit $x10
+ %1:_(s64) = COPY $x10
+ %0:_(s2) = G_TRUNC %1(s64)
+ %2:_(s2) = G_CTPOP %0(s2)
+ %3:_(s64) = G_ANYEXT %2(s2)
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+
+...
|
🐧 Linux x64 Test Results
|
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fixes the assertion in #168523
This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly.