-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Lower the alignment requirement for a GPR pair spill for Zdinx on RV32. #85871
Conversation
…x on RV32. I believe we can use XLen alignment as long as eliminateFrameIndex limits the maximum folded offset to 2043. This way when we split the load/store into two 2 instructions we'll be able to add 4 without overflowing simm12. The test is long to make sure we generate enough spills to have a large offset. I'm open to suggestions on ways to shorten it.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesI believe we can use XLen alignment as long as eliminateFrameIndex The test is long to make sure we generate enough spills to have a Stacked on a minor refactor #85847 Patch is 106.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85871.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a68674b221d38e..881aab955f7d0b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -431,29 +431,35 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
if (!IsRVVSpill) {
- if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) {
+ int64_t Val = Offset.getFixed();
+ int64_t Lo12 = SignExtend64<12>(Val);
+ unsigned Opc = MI.getOpcode();
+ if (Opc == RISCV::ADDI && !isInt<12>(Val)) {
// We chose to emit the canonical immediate sequence rather than folding
// the offset into the using add under the theory that doing so doesn't
// save dynamic instruction count and some target may fuse the canonical
// 32 bit immediate sequence. We still need to clear the portion of the
// offset encoded in the immediate.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+ } else if ((Opc == RISCV::PREFETCH_I || Opc == RISCV::PREFETCH_R ||
+ Opc == RISCV::PREFETCH_W) &&
+ (Lo12 & 0b11111) != 0) {
+ // Prefetch instructions require the offset to be 32 byte aligned.
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+ } else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
+ Opc == RISCV::PseudoRV32ZdinxSD) &&
+ Lo12 >= 2044) {
+ // This instruction will be split into 2 instructions. The second
+ // instruction will add 4 to the immediate. If that would overflow 12
+ // bits, we can't fold the offset.
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} else {
// We can encode an add with 12 bit signed immediate in the immediate
// operand of our user instruction. As a result, the remaining
// offset can by construction, at worst, a LUI and a ADD.
- int64_t Val = Offset.getFixed();
- int64_t Lo12 = SignExtend64<12>(Val);
- if ((MI.getOpcode() == RISCV::PREFETCH_I ||
- MI.getOpcode() == RISCV::PREFETCH_R ||
- MI.getOpcode() == RISCV::PREFETCH_W) &&
- (Lo12 & 0b11111) != 0)
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
- else {
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
- Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
- Offset.getScalable());
- }
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
+ Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
+ Offset.getScalable());
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 225b57554c1dc0..9da1f73681c68c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -573,7 +573,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
}
let RegInfos = RegInfoByHwMode<[RV32, RV64],
- [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>,
+ [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>,
DecoderMethod = "DecodeGPRPairRegisterClass" in
def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
X10_X11, X12_X13, X14_X15, X16_X17,
diff --git a/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll b/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll
new file mode 100644
index 00000000000000..d9856478b19053
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zdinx-large-spill.ll
@@ -0,0 +1,2873 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=riscv32 -mattr=+zdinx | FileCheck %s
+
+; Generate over 2048 bytes of spills by load a bunch of values and then forcing
+; all GPRs to be spilled via inline assembly that clobbers all registes. We
+; want to make sure eliminateFrameIndex doesn't fold sp+2044 as an offset in a
+; GPR pair spill instruction. When we split the pair spill, we would be unable
+; to add 4 to the immediate without overflowing simm12.
+
+; 2040(sp) should be the largest offset we have.
+
+define void @foo(ptr nocapture noundef %0) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -2032
+; CHECK-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s2, 2016(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s3, 2012(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s4, 2008(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s5, 2004(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s6, 2000(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s7, 1996(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s8, 1992(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s9, 1988(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s10, 1984(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s11, 1980(sp) # 4-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a2, 0(a0)
+; CHECK-NEXT: lw a3, 4(a0)
+; CHECK-NEXT: lui a1, 1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: sw a2, -2044(a1)
+; CHECK-NEXT: sw a3, -2040(a1)
+; CHECK-NEXT: lw a2, 8(a0)
+; CHECK-NEXT: lw a3, 12(a0)
+; CHECK-NEXT: addi a1, sp, 2044
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: sw a3, 4(a1)
+; CHECK-NEXT: lw a2, 16(a0)
+; CHECK-NEXT: lw a3, 20(a0)
+; CHECK-NEXT: sw a2, 2036(sp)
+; CHECK-NEXT: sw a3, 2040(sp)
+; CHECK-NEXT: lw a2, 24(a0)
+; CHECK-NEXT: lw a3, 28(a0)
+; CHECK-NEXT: sw a2, 2028(sp)
+; CHECK-NEXT: sw a3, 2032(sp)
+; CHECK-NEXT: lw a2, 32(a0)
+; CHECK-NEXT: lw a3, 36(a0)
+; CHECK-NEXT: sw a2, 2020(sp)
+; CHECK-NEXT: sw a3, 2024(sp)
+; CHECK-NEXT: lw a2, 40(a0)
+; CHECK-NEXT: lw a3, 44(a0)
+; CHECK-NEXT: sw a2, 2012(sp)
+; CHECK-NEXT: sw a3, 2016(sp)
+; CHECK-NEXT: lw a2, 48(a0)
+; CHECK-NEXT: lw a3, 52(a0)
+; CHECK-NEXT: sw a2, 2004(sp)
+; CHECK-NEXT: sw a3, 2008(sp)
+; CHECK-NEXT: lw a2, 56(a0)
+; CHECK-NEXT: lw a3, 60(a0)
+; CHECK-NEXT: sw a2, 1996(sp)
+; CHECK-NEXT: sw a3, 2000(sp)
+; CHECK-NEXT: lw a2, 64(a0)
+; CHECK-NEXT: lw a3, 68(a0)
+; CHECK-NEXT: sw a2, 1988(sp)
+; CHECK-NEXT: sw a3, 1992(sp)
+; CHECK-NEXT: lw a2, 72(a0)
+; CHECK-NEXT: lw a3, 76(a0)
+; CHECK-NEXT: sw a2, 1980(sp)
+; CHECK-NEXT: sw a3, 1984(sp)
+; CHECK-NEXT: lw a2, 80(a0)
+; CHECK-NEXT: lw a3, 84(a0)
+; CHECK-NEXT: sw a2, 1972(sp)
+; CHECK-NEXT: sw a3, 1976(sp)
+; CHECK-NEXT: lw a2, 88(a0)
+; CHECK-NEXT: lw a3, 92(a0)
+; CHECK-NEXT: sw a2, 1964(sp)
+; CHECK-NEXT: sw a3, 1968(sp)
+; CHECK-NEXT: lw a2, 96(a0)
+; CHECK-NEXT: lw a3, 100(a0)
+; CHECK-NEXT: sw a2, 1956(sp)
+; CHECK-NEXT: sw a3, 1960(sp)
+; CHECK-NEXT: lw a2, 104(a0)
+; CHECK-NEXT: lw a3, 108(a0)
+; CHECK-NEXT: sw a2, 1948(sp)
+; CHECK-NEXT: sw a3, 1952(sp)
+; CHECK-NEXT: lw a2, 112(a0)
+; CHECK-NEXT: lw a3, 116(a0)
+; CHECK-NEXT: sw a2, 1940(sp)
+; CHECK-NEXT: sw a3, 1944(sp)
+; CHECK-NEXT: lw a2, 120(a0)
+; CHECK-NEXT: lw a3, 124(a0)
+; CHECK-NEXT: sw a2, 1932(sp)
+; CHECK-NEXT: sw a3, 1936(sp)
+; CHECK-NEXT: lw a2, 128(a0)
+; CHECK-NEXT: lw a3, 132(a0)
+; CHECK-NEXT: sw a2, 1924(sp)
+; CHECK-NEXT: sw a3, 1928(sp)
+; CHECK-NEXT: lw a2, 136(a0)
+; CHECK-NEXT: lw a3, 140(a0)
+; CHECK-NEXT: sw a2, 1916(sp)
+; CHECK-NEXT: sw a3, 1920(sp)
+; CHECK-NEXT: lw a2, 144(a0)
+; CHECK-NEXT: lw a3, 148(a0)
+; CHECK-NEXT: sw a2, 1908(sp)
+; CHECK-NEXT: sw a3, 1912(sp)
+; CHECK-NEXT: lw a2, 152(a0)
+; CHECK-NEXT: lw a3, 156(a0)
+; CHECK-NEXT: sw a2, 1900(sp)
+; CHECK-NEXT: sw a3, 1904(sp)
+; CHECK-NEXT: lw a2, 160(a0)
+; CHECK-NEXT: lw a3, 164(a0)
+; CHECK-NEXT: sw a2, 1892(sp)
+; CHECK-NEXT: sw a3, 1896(sp)
+; CHECK-NEXT: lw a2, 168(a0)
+; CHECK-NEXT: lw a3, 172(a0)
+; CHECK-NEXT: sw a2, 1884(sp)
+; CHECK-NEXT: sw a3, 1888(sp)
+; CHECK-NEXT: lw a2, 176(a0)
+; CHECK-NEXT: lw a3, 180(a0)
+; CHECK-NEXT: sw a2, 1876(sp)
+; CHECK-NEXT: sw a3, 1880(sp)
+; CHECK-NEXT: lw a2, 184(a0)
+; CHECK-NEXT: lw a3, 188(a0)
+; CHECK-NEXT: sw a2, 1868(sp)
+; CHECK-NEXT: sw a3, 1872(sp)
+; CHECK-NEXT: lw a2, 192(a0)
+; CHECK-NEXT: lw a3, 196(a0)
+; CHECK-NEXT: sw a2, 1860(sp)
+; CHECK-NEXT: sw a3, 1864(sp)
+; CHECK-NEXT: lw a2, 200(a0)
+; CHECK-NEXT: lw a3, 204(a0)
+; CHECK-NEXT: sw a2, 1852(sp)
+; CHECK-NEXT: sw a3, 1856(sp)
+; CHECK-NEXT: lw a2, 208(a0)
+; CHECK-NEXT: lw a3, 212(a0)
+; CHECK-NEXT: sw a2, 1844(sp)
+; CHECK-NEXT: sw a3, 1848(sp)
+; CHECK-NEXT: lw a2, 216(a0)
+; CHECK-NEXT: lw a3, 220(a0)
+; CHECK-NEXT: sw a2, 1836(sp)
+; CHECK-NEXT: sw a3, 1840(sp)
+; CHECK-NEXT: lw a2, 224(a0)
+; CHECK-NEXT: lw a3, 228(a0)
+; CHECK-NEXT: sw a2, 1828(sp)
+; CHECK-NEXT: sw a3, 1832(sp)
+; CHECK-NEXT: lw a2, 232(a0)
+; CHECK-NEXT: lw a3, 236(a0)
+; CHECK-NEXT: sw a2, 1820(sp)
+; CHECK-NEXT: sw a3, 1824(sp)
+; CHECK-NEXT: lw a2, 240(a0)
+; CHECK-NEXT: lw a3, 244(a0)
+; CHECK-NEXT: sw a2, 1812(sp)
+; CHECK-NEXT: sw a3, 1816(sp)
+; CHECK-NEXT: lw a2, 248(a0)
+; CHECK-NEXT: lw a3, 252(a0)
+; CHECK-NEXT: sw a2, 1804(sp)
+; CHECK-NEXT: sw a3, 1808(sp)
+; CHECK-NEXT: lw a2, 256(a0)
+; CHECK-NEXT: lw a3, 260(a0)
+; CHECK-NEXT: sw a2, 1796(sp)
+; CHECK-NEXT: sw a3, 1800(sp)
+; CHECK-NEXT: lw a2, 264(a0)
+; CHECK-NEXT: lw a3, 268(a0)
+; CHECK-NEXT: sw a2, 1788(sp)
+; CHECK-NEXT: sw a3, 1792(sp)
+; CHECK-NEXT: lw a2, 272(a0)
+; CHECK-NEXT: lw a3, 276(a0)
+; CHECK-NEXT: sw a2, 1780(sp)
+; CHECK-NEXT: sw a3, 1784(sp)
+; CHECK-NEXT: lw a2, 280(a0)
+; CHECK-NEXT: lw a3, 284(a0)
+; CHECK-NEXT: sw a2, 1772(sp)
+; CHECK-NEXT: sw a3, 1776(sp)
+; CHECK-NEXT: lw a2, 288(a0)
+; CHECK-NEXT: lw a3, 292(a0)
+; CHECK-NEXT: sw a2, 1764(sp)
+; CHECK-NEXT: sw a3, 1768(sp)
+; CHECK-NEXT: lw a2, 296(a0)
+; CHECK-NEXT: lw a3, 300(a0)
+; CHECK-NEXT: sw a2, 1756(sp)
+; CHECK-NEXT: sw a3, 1760(sp)
+; CHECK-NEXT: lw a2, 304(a0)
+; CHECK-NEXT: lw a3, 308(a0)
+; CHECK-NEXT: sw a2, 1748(sp)
+; CHECK-NEXT: sw a3, 1752(sp)
+; CHECK-NEXT: lw a2, 312(a0)
+; CHECK-NEXT: lw a3, 316(a0)
+; CHECK-NEXT: sw a2, 1740(sp)
+; CHECK-NEXT: sw a3, 1744(sp)
+; CHECK-NEXT: lw a2, 320(a0)
+; CHECK-NEXT: lw a3, 324(a0)
+; CHECK-NEXT: sw a2, 1732(sp)
+; CHECK-NEXT: sw a3, 1736(sp)
+; CHECK-NEXT: lw a2, 328(a0)
+; CHECK-NEXT: lw a3, 332(a0)
+; CHECK-NEXT: sw a2, 1724(sp)
+; CHECK-NEXT: sw a3, 1728(sp)
+; CHECK-NEXT: lw a2, 336(a0)
+; CHECK-NEXT: lw a3, 340(a0)
+; CHECK-NEXT: sw a2, 1716(sp)
+; CHECK-NEXT: sw a3, 1720(sp)
+; CHECK-NEXT: lw a2, 344(a0)
+; CHECK-NEXT: lw a3, 348(a0)
+; CHECK-NEXT: sw a2, 1708(sp)
+; CHECK-NEXT: sw a3, 1712(sp)
+; CHECK-NEXT: lw a2, 352(a0)
+; CHECK-NEXT: lw a3, 356(a0)
+; CHECK-NEXT: sw a2, 1700(sp)
+; CHECK-NEXT: sw a3, 1704(sp)
+; CHECK-NEXT: lw a2, 360(a0)
+; CHECK-NEXT: lw a3, 364(a0)
+; CHECK-NEXT: sw a2, 1692(sp)
+; CHECK-NEXT: sw a3, 1696(sp)
+; CHECK-NEXT: lw a2, 368(a0)
+; CHECK-NEXT: lw a3, 372(a0)
+; CHECK-NEXT: sw a2, 1684(sp)
+; CHECK-NEXT: sw a3, 1688(sp)
+; CHECK-NEXT: lw a2, 376(a0)
+; CHECK-NEXT: lw a3, 380(a0)
+; CHECK-NEXT: sw a2, 1676(sp)
+; CHECK-NEXT: sw a3, 1680(sp)
+; CHECK-NEXT: lw a2, 384(a0)
+; CHECK-NEXT: lw a3, 388(a0)
+; CHECK-NEXT: sw a2, 1668(sp)
+; CHECK-NEXT: sw a3, 1672(sp)
+; CHECK-NEXT: lw a2, 392(a0)
+; CHECK-NEXT: lw a3, 396(a0)
+; CHECK-NEXT: sw a2, 1660(sp)
+; CHECK-NEXT: sw a3, 1664(sp)
+; CHECK-NEXT: lw a2, 400(a0)
+; CHECK-NEXT: lw a3, 404(a0)
+; CHECK-NEXT: sw a2, 1652(sp)
+; CHECK-NEXT: sw a3, 1656(sp)
+; CHECK-NEXT: lw a2, 408(a0)
+; CHECK-NEXT: lw a3, 412(a0)
+; CHECK-NEXT: sw a2, 1644(sp)
+; CHECK-NEXT: sw a3, 1648(sp)
+; CHECK-NEXT: lw a2, 416(a0)
+; CHECK-NEXT: lw a3, 420(a0)
+; CHECK-NEXT: sw a2, 1636(sp)
+; CHECK-NEXT: sw a3, 1640(sp)
+; CHECK-NEXT: lw a2, 424(a0)
+; CHECK-NEXT: lw a3, 428(a0)
+; CHECK-NEXT: sw a2, 1628(sp)
+; CHECK-NEXT: sw a3, 1632(sp)
+; CHECK-NEXT: lw a2, 432(a0)
+; CHECK-NEXT: lw a3, 436(a0)
+; CHECK-NEXT: sw a2, 1620(sp)
+; CHECK-NEXT: sw a3, 1624(sp)
+; CHECK-NEXT: lw a2, 440(a0)
+; CHECK-NEXT: lw a3, 444(a0)
+; CHECK-NEXT: sw a2, 1612(sp)
+; CHECK-NEXT: sw a3, 1616(sp)
+; CHECK-NEXT: lw a2, 448(a0)
+; CHECK-NEXT: lw a3, 452(a0)
+; CHECK-NEXT: sw a2, 1604(sp)
+; CHECK-NEXT: sw a3, 1608(sp)
+; CHECK-NEXT: lw a2, 456(a0)
+; CHECK-NEXT: lw a3, 460(a0)
+; CHECK-NEXT: sw a2, 1596(sp)
+; CHECK-NEXT: sw a3, 1600(sp)
+; CHECK-NEXT: lw a2, 464(a0)
+; CHECK-NEXT: lw a3, 468(a0)
+; CHECK-NEXT: sw a2, 1588(sp)
+; CHECK-NEXT: sw a3, 1592(sp)
+; CHECK-NEXT: lw a2, 472(a0)
+; CHECK-NEXT: lw a3, 476(a0)
+; CHECK-NEXT: sw a2, 1580(sp)
+; CHECK-NEXT: sw a3, 1584(sp)
+; CHECK-NEXT: lw a2, 480(a0)
+; CHECK-NEXT: lw a3, 484(a0)
+; CHECK-NEXT: sw a2, 1572(sp)
+; CHECK-NEXT: sw a3, 1576(sp)
+; CHECK-NEXT: lw a2, 488(a0)
+; CHECK-NEXT: lw a3, 492(a0)
+; CHECK-NEXT: sw a2, 1564(sp)
+; CHECK-NEXT: sw a3, 1568(sp)
+; CHECK-NEXT: lw a2, 496(a0)
+; CHECK-NEXT: lw a3, 500(a0)
+; CHECK-NEXT: sw a2, 1556(sp)
+; CHECK-NEXT: sw a3, 1560(sp)
+; CHECK-NEXT: lw a2, 504(a0)
+; CHECK-NEXT: lw a3, 508(a0)
+; CHECK-NEXT: sw a2, 1548(sp)
+; CHECK-NEXT: sw a3, 1552(sp)
+; CHECK-NEXT: lw a2, 512(a0)
+; CHECK-NEXT: lw a3, 516(a0)
+; CHECK-NEXT: sw a2, 1540(sp)
+; CHECK-NEXT: sw a3, 1544(sp)
+; CHECK-NEXT: lw a2, 520(a0)
+; CHECK-NEXT: lw a3, 524(a0)
+; CHECK-NEXT: sw a2, 1532(sp)
+; CHECK-NEXT: sw a3, 1536(sp)
+; CHECK-NEXT: lw a2, 528(a0)
+; CHECK-NEXT: lw a3, 532(a0)
+; CHECK-NEXT: sw a2, 1524(sp)
+; CHECK-NEXT: sw a3, 1528(sp)
+; CHECK-NEXT: lw a2, 536(a0)
+; CHECK-NEXT: lw a3, 540(a0)
+; CHECK-NEXT: sw a2, 1516(sp)
+; CHECK-NEXT: sw a3, 1520(sp)
+; CHECK-NEXT: lw a2, 544(a0)
+; CHECK-NEXT: lw a3, 548(a0)
+; CHECK-NEXT: sw a2, 1508(sp)
+; CHECK-NEXT: sw a3, 1512(sp)
+; CHECK-NEXT: lw a2, 552(a0)
+; CHECK-NEXT: lw a3, 556(a0)
+; CHECK-NEXT: sw a2, 1500(sp)
+; CHECK-NEXT: sw a3, 1504(sp)
+; CHECK-NEXT: lw a2, 560(a0)
+; CHECK-NEXT: lw a3, 564(a0)
+; CHECK-NEXT: sw a2, 1492(sp)
+; CHECK-NEXT: sw a3, 1496(sp)
+; CHECK-NEXT: lw a2, 568(a0)
+; CHECK-NEXT: lw a3, 572(a0)
+; CHECK-NEXT: sw a2, 1484(sp)
+; CHECK-NEXT: sw a3, 1488(sp)
+; CHECK-NEXT: lw a2, 576(a0)
+; CHECK-NEXT: lw a3, 580(a0)
+; CHECK-NEXT: sw a2, 1476(sp)
+; CHECK-NEXT: sw a3, 1480(sp)
+; CHECK-NEXT: lw a2, 584(a0)
+; CHECK-NEXT: lw a3, 588(a0)
+; CHECK-NEXT: sw a2, 1468(sp)
+; CHECK-NEXT: sw a3, 1472(sp)
+; CHECK-NEXT: lw a2, 592(a0)
+; CHECK-NEXT: lw a3, 596(a0)
+; CHECK-NEXT: sw a2, 1460(sp)
+; CHECK-NEXT: sw a3, 1464(sp)
+; CHECK-NEXT: lw a2, 600(a0)
+; CHECK-NEXT: lw a3, 604(a0)
+; CHECK-NEXT: sw a2, 1452(sp)
+; CHECK-NEXT: sw a3, 1456(sp)
+; CHECK-NEXT: lw a2, 608(a0)
+; CHECK-NEXT: lw a3, 612(a0)
+; CHECK-NEXT: sw a2, 1444(sp)
+; CHECK-NEXT: sw a3, 1448(sp)
+; CHECK-NEXT: lw a2, 616(a0)
+; CHECK-NEXT: lw a3, 620(a0)
+; CHECK-NEXT: sw a2, 1436(sp)
+; CHECK-NEXT: sw a3, 1440(sp)
+; CHECK-NEXT: lw a2, 624(a0)
+; CHECK-NEXT: lw a3, 628(a0)
+; CHECK-NEXT: sw a2, 1428(sp)
+; CHECK-NEXT: sw a3, 1432(sp)
+; CHECK-NEXT: lw a2, 632(a0)
+; CHECK-NEXT: lw a3, 636(a0)
+; CHECK-NEXT: sw a2, 1420(sp)
+; CHECK-NEXT: sw a3, 1424(sp)
+; CHECK-NEXT: lw a2, 640(a0)
+; CHECK-NEXT: lw a3, 644(a0)
+; CHECK-NEXT: sw a2, 1412(sp)
+; CHECK-NEXT: sw a3, 1416(sp)
+; CHECK-NEXT: lw a2, 648(a0)
+; CHECK-NEXT: lw a3, 652(a0)
+; CHECK-NEXT: sw a2, 1404(sp)
+; CHECK-NEXT: sw a3, 1408(sp)
+; CHECK-NEXT: lw a2, 656(a0)
+; CHECK-NEXT: lw a3, 660(a0)
+; CHECK-NEXT: sw a2, 1396(sp)
+; CHECK-NEXT: sw a3, 1400(sp)
+; CHECK-NEXT: lw a2, 664(a0)
+; CHECK-NEXT: lw a3, 668(a0)
+; CHECK-NEXT: sw a2, 1388(sp)
+; CHECK-NEXT: sw a3, 1392(sp)
+; CHECK-NEXT: lw a2, 672(a0)
+; CHECK-NEXT: lw a3, 676(a0)
+; CHECK-NEXT: sw a2, 1380(sp)
+; CHECK-NEXT: sw a3, 1384(sp)
+; CHECK-NEXT: lw a2, 680(a0)
+; CHECK-NEXT: lw a3, 684(a0)
+; CHECK-NEXT: sw a2, 1372(sp)
+; CHECK-NEXT: sw a3, 1376(sp)
+; CHECK-NEXT: lw a2, 688(a0)
+; CHECK-NEXT: lw a3, 692(a0)
+; CHECK-NEXT: sw a2, 1364(sp)
+; CHECK-NEXT: sw a3, 1368(sp)
+; CHECK-NEXT: lw a2, 696(a0)
+; CHECK-NEXT: lw a3, 700(a0)
+; CHECK-NEXT: sw a2, 1356(sp)
+; CHECK-NEXT: sw a3, 1360(sp)
+; CHECK-NEXT: lw a2, 704(a0)
+; CHECK-NEXT: lw a3, 708(a0)
+; CHECK-NEXT: sw a2, 1348(sp)
+; CHECK-NEXT: sw a3, 1352(sp)
+; CHECK-NEXT: lw a2, 712(a0)
+; CHECK-NEXT: lw a3, 716(a0)
+; CHECK-NEXT: sw a2, 1340(sp)
+; CHECK-NEXT: sw a3, 1344(sp)
+; CHECK-NEXT: lw a2, 720(a0)
+; CHECK-NEXT: lw a3, 724(a0)
+; CHECK-NEXT: sw a2, 1332(sp)
+; CHECK-NEXT: sw a3, 1336(sp)
+; CHECK-NEXT: lw a2, 728(a0)
+; CHECK-NEXT: lw a3, 732(a0)
+; CHECK-NEXT: sw a2, 1324(sp)
+; CHECK-NEXT: sw a3, 1328(sp)
+; CHECK-NEXT: lw a2, 736(a0)
+; CHECK-NEXT: lw a3, 740(a0)
+; CHECK-NEXT: sw a2, 1316(sp)
+; CHECK-NEXT: sw a3, 1320(sp)
+; CHECK-NEXT: lw a2, 744(a0)
+; CHECK-NEXT: lw a3, 748(a0)
+; CHECK-NEXT: sw a2, 1308(sp)
+; CHECK-NEXT: sw a3, 1312(sp)
+; CHECK-NEXT: lw a2, 752(a0)
+; CHECK-NEXT: lw a3, 756(a0)
+; CHECK-NEXT: sw a2, 1300(sp)
+; CHECK-NEXT: sw a3, 1304(sp)
+; CHECK-NEXT: lw a2, 760(a0)
+; CHECK-NEXT: lw a3, 764(a0)
+; CHECK-NEXT: sw a2, 1292(sp)
+; CHECK-NEXT: sw a3, 1296(sp)
+; CHECK-NEXT: lw a2, 768(a0)
+; CHECK-NEXT: lw a3, 772(a0)
+; CHECK-NEXT: sw a2, 1284(sp)
+; CHECK-NEXT: sw a3, 1288(sp)
+; CHECK-NEXT: lw a2, 776(a0)
+; CHECK-NEXT: lw a3, 780(a0)
+; CHECK-NEXT: sw a2, 1276(sp)
+; CHECK-NEXT: sw a3, 1280(sp)
+; CHECK-NEXT: lw a2, 784(a0)
+; CHECK-NEXT: lw a3, 788(a0)
+; CHECK-NEXT: sw a2, 1268(sp)
+; CHECK-NEXT: sw a3, 1272(sp)
+; CHECK-NEXT: lw a2, 792(a0)
+; CHECK-NEXT: lw a3, 796(a0)
+; CHECK-NEXT: sw a2, 1260(sp)
+; CHECK-NEXT: sw a3, 1264(sp)
+; CHECK-NEXT: lw a2, 800(a0)
+; CHECK-NEXT: lw a3, 804(a0)
+; CHECK-NEXT: sw a2, 1252(sp)
+; CHECK-NEXT: sw a3, 1256(sp)
+; CHECK-NEXT: lw a2, 808(a0)
+; CHECK-NEXT: lw a3, 812(a0)
+; CHECK-NEXT: sw a2, 1244(sp)
+; CHECK-NEXT: sw a3, 1248(sp)
+; CHECK-NEXT: lw a2, 816(a0)
+; CHECK-NEXT: lw a3, 820(a0)
+; CHECK-NEXT: sw a2, 1236(sp)
+; CHECK-NEXT: sw a3, 1240(sp)
+; CHECK-NEXT: lw a2, 824(a0)
+; CHECK-NEXT: lw a3, 828(a0)
+; CHECK-NEXT: sw a2, 1228(sp)
+; CHECK-NEXT: sw a3, 1232(sp)
+; CHECK-NEXT: lw a2, 832(a0)
+; CHECK-NEXT: lw a3, 836(a0)
+; CHECK-NEXT: sw a2, 1220(sp)
+; CHECK-NEXT: sw a3, 1224(sp)
+; CHECK-NEXT: lw a2, 840(a0)
+; CHECK-NEXT: lw a3, 844(a0)
+; CHECK-NEXT: sw a2, 1212(sp)
+; CHECK-NEXT: sw a3, 1216(sp)
+; CHECK-NEXT: lw a2, 848(a0)
+; CHECK-NEXT: lw a3, 852(a0)
+; CHECK-NEXT: sw a2, 1204(sp)
+; CHECK-NEXT: sw a3, 1208(sp)
+; CHECK-NEXT: lw a2, 856(a0)
+; CHECK-NEXT: lw a3, 860(a0)
+; CHECK-NEXT: sw a2, 1196(sp)
+; CHECK-NEXT: sw a3, 1200(sp)
+; CHECK-NEXT: lw a2, 864(a0)
+; CHECK-NEXT: lw a3, 868(a0)
+; CHECK-NEXT: sw a2, 1188(sp)
+; CHECK-NEXT: sw a3, 1192(sp)
+; CHECK-NEXT: lw a2, 872(a0)
+; CHECK-NEXT: lw a3, 876(a0)
+; CHECK-NEXT: sw a2, 1180(sp)
+; CHECK-NEXT: sw a3, 1184(sp)
+; CHECK-NEXT: lw a2, 880(a0)
+; CHECK-NEXT: lw a3, 884(a0)
+; CHECK-NEXT: sw a2, 1172(sp)
+; CHECK-NEXT: sw a3, 1176(sp)
+; CHECK-NEXT: lw a2, 888(a0)
+; CHECK-NEXT: lw a3, 892(a0)
+; CHECK-NEXT: sw a2, 1164(sp)
+; CHECK-NEXT: sw a3, 1168(sp)
+; CHECK-NEXT: lw a2, 896(a0)
+; CHECK-NEXT: lw a3, 900(a0)
+; CHECK-NEXT: sw a2, 1156(sp)
+; CHECK-NEXT: sw a3, 1160(sp)
+; CHECK-NEXT: lw a2, 904(a0)
+; CHECK-NEXT: lw a3, 908(a0)
+; CHECK-NEXT: sw a2, 1148(sp)
...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Maybe a MIR test would be better? we can set frame info manually in MIR. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
…x on RV32. (llvm#85871) I believe we can use XLen alignment as long as eliminateFrameIndex limits the maximum folded offset to 2043. This way when we split the load/store into two 2 instructions we'll be able to add 4 without overflowing simm12.
I believe we can use XLen alignment as long as eliminateFrameIndex
limits the maximum folded offset to 2043. This way when we split
the load/store into two 2 instructions we'll be able to add 4
without overflowing simm12.
The test is long to make sure we generate enough spills to have a
large offset. I'm open to suggestions on ways to shorten it.
Stacked on a minor refactor #85847