-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Improve constant materialisation for stores of i8 negative constants #92131
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Alex Bradbury (asb) ChangesThis follows the same pattern as 20e6265. Although we can't reduce the number of instructions used, if we are able to use a sign-extended 6-bit immediate then the 16-bit c.li instruction can be selected (thus saving code size). Although this could be gated so it only happens if C is enabled, I've opted not to because at worst it's neutral and it doesn't seem helpful to add unnecessary divergence between the RVC and non-RVC paths. Full diff: https://github.com/llvm/llvm-project/pull/92131.diff 8 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 3c4646b95715d..92014032a0f77 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -902,6 +902,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
int64_t Imm = ConstNode->getSExtValue();
+ // If only the lower 8 bits are used, try to convert this to a simm6 by
+ // sign-extending bit 7. This is neutral without the C extension, and
+ // allows C.LI to be used if C is present.
+ if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
+ Imm = SignExtend64<6>(Imm);
// If the upper XLen-16 bits are not used, try to convert this to a simm12
// by sign extending bit 15.
if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 7d4aec2dfdc98..ece04dd7f4b75 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -121,6 +121,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool hasAllNBitUsers(SDNode *Node, unsigned Bits,
const unsigned Depth = 0) const;
+ bool hasAllBUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 8); }
bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index c5c1657b526a6..5fd25ab60db01 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1558,6 +1558,58 @@ define i64 @imm_2reg_1() nounwind {
ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
}
+define void @imm_store_i8_neg1(ptr %p) nounwind {
+; RV32I-LABEL: imm_store_i8_neg1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a1, -1
+; RV32I-NEXT: sb a1, 0(a0)
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: imm_store_i8_neg1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, -1
+; RV64I-NEXT: sb a1, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64IZBA-LABEL: imm_store_i8_neg1:
+; RV64IZBA: # %bb.0:
+; RV64IZBA-NEXT: li a1, -1
+; RV64IZBA-NEXT: sb a1, 0(a0)
+; RV64IZBA-NEXT: ret
+;
+; RV64IZBB-LABEL: imm_store_i8_neg1:
+; RV64IZBB: # %bb.0:
+; RV64IZBB-NEXT: li a1, -1
+; RV64IZBB-NEXT: sb a1, 0(a0)
+; RV64IZBB-NEXT: ret
+;
+; RV64IZBS-LABEL: imm_store_i8_neg1:
+; RV64IZBS: # %bb.0:
+; RV64IZBS-NEXT: li a1, -1
+; RV64IZBS-NEXT: sb a1, 0(a0)
+; RV64IZBS-NEXT: ret
+;
+; RV64IXTHEADBB-LABEL: imm_store_i8_neg1:
+; RV64IXTHEADBB: # %bb.0:
+; RV64IXTHEADBB-NEXT: li a1, -1
+; RV64IXTHEADBB-NEXT: sb a1, 0(a0)
+; RV64IXTHEADBB-NEXT: ret
+;
+; RV32-REMAT-LABEL: imm_store_i8_neg1:
+; RV32-REMAT: # %bb.0:
+; RV32-REMAT-NEXT: li a1, -1
+; RV32-REMAT-NEXT: sb a1, 0(a0)
+; RV32-REMAT-NEXT: ret
+;
+; RV64-REMAT-LABEL: imm_store_i8_neg1:
+; RV64-REMAT: # %bb.0:
+; RV64-REMAT-NEXT: li a1, -1
+; RV64-REMAT-NEXT: sb a1, 0(a0)
+; RV64-REMAT-NEXT: ret
+ store i8 -1, ptr %p
+ ret void
+}
+
define void @imm_store_i16_neg1(ptr %p) nounwind {
; RV32I-LABEL: imm_store_i16_neg1:
; RV32I: # %bb.0:
@@ -2121,8 +2173,8 @@ define i64 @imm_70370820078523() {
;
; RV64I-POOL-LABEL: imm_70370820078523:
; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI37_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI37_0)(a0)
+; RV64I-POOL-NEXT: lui a0, %hi(.LCPI38_0)
+; RV64I-POOL-NEXT: ld a0, %lo(.LCPI38_0)(a0)
; RV64I-POOL-NEXT: ret
;
; RV64IZBA-LABEL: imm_70370820078523:
@@ -2266,8 +2318,8 @@ define i64 @imm_neg_9223301666034697285() {
;
; RV64I-POOL-LABEL: imm_neg_9223301666034697285:
; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI39_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI39_0)(a0)
+; RV64I-POOL-NEXT: lui a0, %hi(.LCPI40_0)
+; RV64I-POOL-NEXT: ld a0, %lo(.LCPI40_0)(a0)
; RV64I-POOL-NEXT: ret
;
; RV64IZBA-LABEL: imm_neg_9223301666034697285:
@@ -2544,8 +2596,8 @@ define i64 @imm_neg_9223354442718100411() {
;
; RV64I-POOL-LABEL: imm_neg_9223354442718100411:
; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI43_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI43_0)(a0)
+; RV64I-POOL-NEXT: lui a0, %hi(.LCPI44_0)
+; RV64I-POOL-NEXT: ld a0, %lo(.LCPI44_0)(a0)
; RV64I-POOL-NEXT: ret
;
; RV64IZBA-LABEL: imm_neg_9223354442718100411:
@@ -3855,8 +3907,8 @@ define i64 @imm64_same_lo_hi_optsize() nounwind optsize {
;
; RV64I-POOL-LABEL: imm64_same_lo_hi_optsize:
; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI64_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI64_0)(a0)
+; RV64I-POOL-NEXT: lui a0, %hi(.LCPI65_0)
+; RV64I-POOL-NEXT: ld a0, %lo(.LCPI65_0)(a0)
; RV64I-POOL-NEXT: ret
;
; RV64IZBA-LABEL: imm64_same_lo_hi_optsize:
@@ -3930,8 +3982,8 @@ define i64 @imm64_same_lo_hi_negative() nounwind {
;
; RV64I-POOL-LABEL: imm64_same_lo_hi_negative:
; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI65_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI65_0)(a0)
+; RV64I-POOL-NEXT: lui a0, %hi(.LCPI66_0)
+; RV64I-POOL-NEXT: ld a0, %lo(.LCPI66_0)(a0)
; RV64I-POOL-NEXT: ret
;
; RV64IZBA-LABEL: imm64_same_lo_hi_negative:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index b0f6bebea0381..8dc32d13e4a34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -17,7 +17,7 @@ define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
; CHECK-LABEL: shuffle_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 236
+; CHECK-NEXT: li a0, -20
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index ed6c01aaf7fe1..592ce6fc5be0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -260,7 +260,7 @@ define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: li a0, 253
+; CHECK-NEXT: li a0, -3
; CHECK-NEXT: vmadd.vx v8, a0, v9
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 58af6ac246d16..aba69dc846201 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -611,7 +611,7 @@ define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) {
define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: concat_4xi8_start_undef_at_start:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 224
+; CHECK-NEXT: li a0, -32
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslideup.vi v8, v9, 4, v0.t
@@ -682,7 +682,7 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: li a0, 234
+; CHECK-NEXT: li a0, -22
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
@@ -699,7 +699,7 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: li a0, 234
+; CHECK-NEXT: li a0, -22
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 5f456c7824316..03624113a8262 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -160,7 +160,7 @@ define i32 @reduce_sum_16xi32_prefix4(ptr %p) {
define i32 @reduce_sum_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 224
+; CHECK-NEXT: li a1, -32
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
@@ -532,7 +532,7 @@ define i32 @reduce_xor_16xi32_prefix2(ptr %p) {
define i32 @reduce_xor_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_xor_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 224
+; CHECK-NEXT: li a1, -32
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
@@ -620,7 +620,7 @@ define i32 @reduce_or_16xi32_prefix2(ptr %p) {
define i32 @reduce_or_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_or_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 224
+; CHECK-NEXT: li a1, -32
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
@@ -757,7 +757,7 @@ define i32 @reduce_umax_16xi32_prefix2(ptr %p) {
define i32 @reduce_umax_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_umax_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 224
+; CHECK-NEXT: li a1, -32
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vmv.v.i v8, -1
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index ce0d8fedbfb88..10497db6edc49 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -419,7 +419,7 @@ define void @merge_stores_i32_i64(ptr %p) {
define void @store_large_constant(ptr %x) {
; SLOW-LABEL: store_large_constant:
; SLOW: # %bb.0:
-; SLOW-NEXT: li a1, 254
+; SLOW-NEXT: li a1, -2
; SLOW-NEXT: sb a1, 7(a0)
; SLOW-NEXT: li a1, 220
; SLOW-NEXT: sb a1, 6(a0)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/minor comment
… constants This follows the same pattern as 20e6265. Although we can't reduce the number of instructions used, if we are able to use a sign-extended 6-bit immediate then the 16-bit c.li instruction can be used. Although this _could_ be gated so it only happens if C is enabled, I've opted not to because at worst it's neutral and it doesn't seem helpful to add unnecessary divergence between the RVC and non-RVC paths.
ed69fa6
to
fd9a006
Compare
This follows the same pattern as 20e6265. Although we can't reduce the number of instructions used, if we are able to use a sign-extended 6-bit immediate then the 16-bit c.li instruction can be selected (thus saving code size). Although this could be gated so it only happens if C is enabled, I've opted not to because at worst it's neutral and it doesn't seem helpful to add unnecessary divergence between the RVC and non-RVC paths.