-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[RISCV] Use default promotion for i32 CTLZ on RV64 with XTHeadBb. #157994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The existing isel pattern felt like it was emitting more instructions than an isel pattern probably should. We were also missing opportunities to fold the innermost instructions with surrounding instructions. I tried to move the expansion to lowering, but we got a little too aggressive folding the (not (slli (not))) with other operations in some tests and created code with constants that are hard to materialize and missed using TH_FF0. We could probably have fixed that with a RISCVISD::TH_FF0 node. While investigating, I tried using the default promotion. The results aren't obviously worse than the previous codegen. And in some case they are obviously better.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThe existing isel pattern felt like it was emitting more instructions than an isel pattern probably should. We were also missing opportunities to fold the innermost instructions with surrounding instructions. I tried to move the expansion to lowering, but we got a little too aggressive folding the (not (slli (not))) with other operations in some tests and created code with constants that are hard to materialize and missed using TH_FF0. We could probably have fixed that with a RISCVISD::TH_FF0 node. While investigating, I tried using the default promotion. The results aren't obviously worse than the previous codegen. And in some case they are obviously better. Full diff: https://github.com/llvm/llvm-project/pull/157994.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ae9e2fef88673..1a7402aa3ed20 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -421,12 +421,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTLZ, MVT::i32, Custom);
- // Use default promotion for XTHeadBb.
- if (Subtarget.hasStdExtZbb())
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
- }
+ // Use default promotion for i32 without Zbb.
+ if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTLZ, XLenVT, Expand);
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index 4c31ce43c2e5e..49c9bdd83d3f6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -594,8 +594,6 @@ def : Pat<(i64 (sra (bswap GPR:$rs1), (i64 32))),
(TH_REVW GPR:$rs1)>;
def : Pat<(binop_allwusers<srl> (bswap GPR:$rs1), (i64 32)),
(TH_REVW GPR:$rs1)>;
-def : Pat<(riscv_clzw GPR:$rs1),
- (TH_FF0 (i64 (SLLI (i64 (XORI GPR:$rs1, -1)), 32)))>;
} // Predicates = [HasVendorXTHeadBb, IsRV64]
let Predicates = [HasVendorXTHeadBs] in {
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 99ed0a355b909..e6b22b2b9deea 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1375,9 +1375,9 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
;
; RV64XTHEADBB-LABEL: test_ctlz_i32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: not a0, a0
-; RV64XTHEADBB-NEXT: slli a0, a0, 32
-; RV64XTHEADBB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -32
; RV64XTHEADBB-NEXT: ret
%tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
ret i32 %tmp
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index f20ebca314b35..c5707987408f7 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -52,9 +52,9 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
;
; RV64XTHEADBB-NOB-LABEL: ctlz_i32:
; RV64XTHEADBB-NOB: # %bb.0:
-; RV64XTHEADBB-NOB-NEXT: not a0, a0
-; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
-; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NOB-NEXT: th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NOB-NEXT: addi a0, a0, -32
; RV64XTHEADBB-NOB-NEXT: ret
;
; RV64XTHEADBB-B-LABEL: ctlz_i32:
@@ -112,10 +112,9 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
;
; RV64XTHEADBB-NOB-LABEL: log2_i32:
; RV64XTHEADBB-NOB: # %bb.0:
-; RV64XTHEADBB-NOB-NEXT: not a0, a0
-; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
-; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
-; RV64XTHEADBB-NOB-NEXT: li a1, 31
+; RV64XTHEADBB-NOB-NEXT: th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NOB-NEXT: li a1, 63
; RV64XTHEADBB-NOB-NEXT: sub a0, a1, a0
; RV64XTHEADBB-NOB-NEXT: ret
;
@@ -177,10 +176,9 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64XTHEADBB-NOB-LABEL: log2_ceil_i32:
; RV64XTHEADBB-NOB: # %bb.0:
; RV64XTHEADBB-NOB-NEXT: addi a0, a0, -1
-; RV64XTHEADBB-NOB-NEXT: not a0, a0
-; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
-; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
-; RV64XTHEADBB-NOB-NEXT: li a1, 32
+; RV64XTHEADBB-NOB-NEXT: th.extu a0, a0, 31, 0
+; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NOB-NEXT: li a1, 64
; RV64XTHEADBB-NOB-NEXT: sub a0, a1, a0
; RV64XTHEADBB-NOB-NEXT: ret
;
@@ -309,9 +307,8 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64XTHEADBB-NOB-LABEL: ctlz_lshr_i32:
; RV64XTHEADBB-NOB: # %bb.0:
; RV64XTHEADBB-NOB-NEXT: srliw a0, a0, 1
-; RV64XTHEADBB-NOB-NEXT: not a0, a0
-; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
-; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
+; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
+; RV64XTHEADBB-NOB-NEXT: addi a0, a0, -32
; RV64XTHEADBB-NOB-NEXT: ret
;
; RV64XTHEADBB-B-LABEL: ctlz_lshr_i32:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks for doing this!
The existing isel pattern felt like it was emitting more instructions than an isel pattern probably should. We were also missing opportunities to fold the innermost instructions with surrounding instructions.
I tried to move the expansion to lowering, but we got a little too aggressive folding the (not (slli (not))) with other operations in some tests and created code with constants that are hard to materialize and missed using TH_FF0. We could probably have fixed that with a RISCVISD::TH_FF0 node.
While investigating, I tried using the default promotion. The results aren't obviously worse than the previous codegen. And in some case they are obviously better.