-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Improve constant materialization by using a sequence that end… #66943
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-risc-v Changes…s with 2 addis in some cases. If the lower 13 bits are something like 0x17ff, we can first materialize it as 0x1800 followed by an addi to subtract a small offset. This might be cheaper to materialize since the constant ending in 0x1800 can use a simm12 immediate for its final addi. Full diff: https://github.com/llvm/llvm-project/pull/66943.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f659779e9772055..b78067d3881da53 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -206,10 +206,25 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
assert(ActiveFeatures[RISCV::Feature64Bit] &&
"Expected RV32 to only need 2 instructions");
+ // If the lower 13 bits are something like 0x17ff, try to turn it into 0x1800
+ // and use a final addi to correct it back to 0x17ff. This will create a
+ // sequence ending in 2 addis.
+ if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) {
+ int64_t Imm12 = -(0x800 - (Val & 0xfff));
+ int64_t AdjustedVal = Val - Imm12;
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(AdjustedVal, ActiveFeatures, TmpSeq);
+
+ // Keep the new sequence if it is an improvement.
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::ADDI, Imm12);
+ Res = TmpSeq;
+ }
+ }
+
// If the constant is positive we might be able to generate a shifted constant
// with no leading zeros and use a final SRLI to restore them.
- if (Val > 0) {
- assert(Res.size() > 2 && "Expected longer sequence");
+ if (Val > 0 && Res.size() > 2) {
unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
// Fill in the bits that will be shifted out with 1s. An example where this
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 4f9cf1d947d5c35..2f272cad60f9e18 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1117,46 +1117,41 @@ define i64 @imm_end_2addi_1() nounwind {
; RV64I-LABEL: imm_end_2addi_1:
; RV64I: # %bb.0:
; RV64I-NEXT: li a0, -2047
-; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: slli a0, a0, 39
+; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addi a0, a0, -1
-; RV64I-NEXT: slli a0, a0, 12
-; RV64I-NEXT: addi a0, a0, 2047
; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_end_2addi_1:
; RV64IZBA: # %bb.0:
; RV64IZBA-NEXT: li a0, -2047
-; RV64IZBA-NEXT: slli a0, a0, 27
+; RV64IZBA-NEXT: slli a0, a0, 39
+; RV64IZBA-NEXT: addi a0, a0, -2048
; RV64IZBA-NEXT: addi a0, a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 12
-; RV64IZBA-NEXT: addi a0, a0, 2047
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_end_2addi_1:
; RV64IZBB: # %bb.0:
; RV64IZBB-NEXT: li a0, -2047
-; RV64IZBB-NEXT: slli a0, a0, 27
+; RV64IZBB-NEXT: slli a0, a0, 39
+; RV64IZBB-NEXT: addi a0, a0, -2048
; RV64IZBB-NEXT: addi a0, a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, 2047
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_end_2addi_1:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: li a0, -2047
-; RV64IZBS-NEXT: slli a0, a0, 27
+; RV64IZBS-NEXT: slli a0, a0, 39
+; RV64IZBS-NEXT: addi a0, a0, -2048
; RV64IZBS-NEXT: addi a0, a0, -1
-; RV64IZBS-NEXT: slli a0, a0, 12
-; RV64IZBS-NEXT: addi a0, a0, 2047
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm_end_2addi_1:
; RV64IXTHEADBB: # %bb.0:
; RV64IXTHEADBB-NEXT: li a0, -2047
-; RV64IXTHEADBB-NEXT: slli a0, a0, 27
+; RV64IXTHEADBB-NEXT: slli a0, a0, 39
+; RV64IXTHEADBB-NEXT: addi a0, a0, -2048
; RV64IXTHEADBB-NEXT: addi a0, a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, 2047
; RV64IXTHEADBB-NEXT: ret
ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF
}
@@ -2453,21 +2448,14 @@ define i64 @imm_12900925247761() {
; RV32I-NEXT: addi a1, a1, -1093
; RV32I-NEXT: ret
;
-; RV64-NOPOOL-LABEL: imm_12900925247761:
-; RV64-NOPOOL: # %bb.0:
-; RV64-NOPOOL-NEXT: lui a0, 188
-; RV64-NOPOOL-NEXT: addiw a0, a0, -1093
-; RV64-NOPOOL-NEXT: slli a0, a0, 12
-; RV64-NOPOOL-NEXT: addi a0, a0, 273
-; RV64-NOPOOL-NEXT: slli a0, a0, 12
-; RV64-NOPOOL-NEXT: addi a0, a0, 273
-; RV64-NOPOOL-NEXT: ret
-;
-; RV64I-POOL-LABEL: imm_12900925247761:
-; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI52_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI52_0)(a0)
-; RV64I-POOL-NEXT: ret
+; RV64I-LABEL: imm_12900925247761:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a0, 384478
+; RV64I-NEXT: addiw a0, a0, -1911
+; RV64I-NEXT: slli a0, a0, 13
+; RV64I-NEXT: addi a0, a0, -2048
+; RV64I-NEXT: addi a0, a0, -1775
+; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_12900925247761:
; RV64IZBA: # %bb.0:
@@ -2479,32 +2467,29 @@ define i64 @imm_12900925247761() {
;
; RV64IZBB-LABEL: imm_12900925247761:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: lui a0, 188
-; RV64IZBB-NEXT: addiw a0, a0, -1093
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, 273
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, 273
+; RV64IZBB-NEXT: lui a0, 384478
+; RV64IZBB-NEXT: addiw a0, a0, -1911
+; RV64IZBB-NEXT: slli a0, a0, 13
+; RV64IZBB-NEXT: addi a0, a0, -2048
+; RV64IZBB-NEXT: addi a0, a0, -1775
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_12900925247761:
; RV64IZBS: # %bb.0:
-; RV64IZBS-NEXT: lui a0, 188
-; RV64IZBS-NEXT: addiw a0, a0, -1093
-; RV64IZBS-NEXT: slli a0, a0, 12
-; RV64IZBS-NEXT: addi a0, a0, 273
-; RV64IZBS-NEXT: slli a0, a0, 12
-; RV64IZBS-NEXT: addi a0, a0, 273
+; RV64IZBS-NEXT: lui a0, 384478
+; RV64IZBS-NEXT: addiw a0, a0, -1911
+; RV64IZBS-NEXT: slli a0, a0, 13
+; RV64IZBS-NEXT: addi a0, a0, -2048
+; RV64IZBS-NEXT: addi a0, a0, -1775
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm_12900925247761:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: lui a0, 188
-; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, 273
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, 273
+; RV64IXTHEADBB-NEXT: lui a0, 384478
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1911
+; RV64IXTHEADBB-NEXT: slli a0, a0, 13
+; RV64IXTHEADBB-NEXT: addi a0, a0, -2048
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1775
; RV64IXTHEADBB-NEXT: ret
ret i64 12900925247761
}
|
// If the lower 13 bits are something like 0x17ff, try to turn it into 0x1800 | ||
// and use a final addi to correct it back to 0x17ff. This will create a | ||
// sequence ending in 2 addis. | ||
if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'm missing something here. It looks like you're checking that bit 12 is clear, and that bit 13 is set. I think I get the bit 12 being clear part - that's required for the negative offset we're creating to not cascade - but why check that bit 13 is set?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the example cases I had, it looked like what I wanted to do was make bit 12 and 13 to both be set so that the lower bits were at least the start of a simm12. Once we have 0x800 in the lower 12 bits, the first thing generateInstSeqImpl is going to do is try to clear the low 12 bits by adding 0x800. (or subtract 0xfffffffffffff800) to make an ADDI. If bit 13 is set this will propagate the carry from bit 12 to bit 14. If bit 13 is 0, the add would set bit 13 and stop.
I guess doesn't hurt anything to try it, I was just trying to limit compile time a little.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm suspecting a typo here. I can't line your words up with the code.
Specifically, this check: (Val & 0x1800) == 0x1000). This is checking that the 13-th bit is set, and the 12th bit is not set. This seems to differ from your response where you say that you're making sure both 12 and 13 are set. If you were comparing to 0x1800 (not 0x1000) that would be the effect.
However, the line int64_t Imm12 = -(0x800 - (Val & 0xfff)); requires bit 12 of Val to be clear to produce a uint11_t which can then be inverted into a sint12_t without cornercases. So maybe it's the code that's correct and your comment that is off?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This statement from my response "it looked like what I wanted to do was make bit 12 and 13 to both be set so that the lower bits were at least the start of a simm12." was refering to the change done by this line AdjustedVal = Val - Imm12
. We want AdjustedVal
to have bits 13 and 12 set so that when we call generateInstSeqImpl
on AdjustedVal
, it will subtract a simm12 of 0xfffffffffffff800 and create more than 12 trailing zeros for the next recursive call to generateInstSeqImpl
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, okay. That finally makes sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…s with 2 addis in some cases. If the lower 13 bits are something like 0x17ff, we can first materialize it as 0x1800 followed by an addi to subtract a small offset. This might be cheaper to materialize since the constant ending in 0x1800 can use a simm12 immediate for its final addi.
59f8fe8
to
d419466
Compare
llvm#66943) …s with 2 addis in some cases. If the lower 13 bits are something like 0x17ff, we can first materialize it as 0x1800 followed by an addi to subtract a small offset. This might be cheaper to materialize since the constant ending in 0x1800 can use a simm12 immediate for its final addi.
…s with 2 addis in some cases.
If the lower 13 bits are something like 0x17ff, we can first materialize it as 0x1800 followed by an addi to subtract a small offset. This might be cheaper to materialize since the constant ending in 0x1800 can use a simm12 immediate for its final addi.