-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rematerialize load RISCV backend #73910
Conversation
@llvm/pr-subscribers-backend-risc-v Author: None (niwinanto) ChangesRe materializing load is disabled for RISCV back end, and this can bring performance issues like this. https://discourse.llvm.org/t/how-to-copy-propagate-physical-register-introduced-before-ra/74828 Here I tried to enable the re materialization to fulfill the use case discussed in the above thread. Patch is 211.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73910.diff 20 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 6c5712dc795bc75..169283e13971801 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -46,6 +47,10 @@ static cl::opt<bool> PreferWholeRegisterMove(
"riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
cl::desc("Prefer whole register move for vector registers."));
+static cl::opt<bool> AggressiveLoadRemat(
+ "riscv-enable-load-remat-aggressive", cl::init(true), cl::Hidden,
+ cl::desc("Rematerialize load aggressively"));
+
static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
"riscv-force-machine-combiner-strategy", cl::Hidden,
cl::desc("Force machine combiner to use a specific strategy for machine "
@@ -1567,6 +1572,48 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
return MI.isAsCheapAsAMove();
}
+bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
+ const MachineInstr &MI) const {
+ if (TargetInstrInfo::isReallyTriviallyReMaterializable(MI))
+ return true;
+
+ const MachineFunction &MF = *MI.getMF();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ const MachineOperand &Dest = MI.getOperand(0);
+ if (!MRI.hasOneUse(Dest.getReg()))
+ return false;
+
+ MachineInstr *UseMI = &*MRI.use_instr_begin(Dest.getReg());
+ MachineBasicBlock::const_iterator DefItr(MI);
+ MachineBasicBlock::const_iterator UseItr(UseMI);
+
+ const MachineBasicBlock *MBB = nullptr;
+ if ((MBB = MI.getParent()) != UseMI->getParent())
+ return false;
+
+ // When loading from stack and the stack slot is not modifed before its use,
+ // then materialize this load.
+ int FrameIdx = 0;
+ if (isLoadFromStackSlot(MI, FrameIdx) && AggressiveLoadRemat) {
+ for (; DefItr != UseItr && DefItr != MBB->end(); DefItr++) {
+ int StoreFrameIdx = 0;
+ if ((*DefItr).isCall() || (isStoreToStackSlot(*DefItr, StoreFrameIdx) &&
+ StoreFrameIdx == FrameIdx))
+ return false;
+ }
+ return true;
+ } else if (MI.mayLoad() && AggressiveLoadRemat) {
+ for (; DefItr != UseItr && DefItr != MBB->end(); DefItr++) {
+ if ((*DefItr).isCall() || (*DefItr).mayStore())
+ return false;
+ }
+ return true;
+ }
+
+return false;
+}
+
std::optional<DestSourcePair>
RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
if (MI.isMoveReg())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 8f860077c303170..c572281edde49da 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -138,6 +138,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
bool) const override;
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
std::optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index edc08187d8f775a..78128a4a89892df 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -626,7 +626,7 @@ def BGE : BranchCC_rri<0b101, "bge">;
def BLTU : BranchCC_rri<0b110, "bltu">;
def BGEU : BranchCC_rri<0b111, "bgeu">;
-let IsSignExtendingOpW = 1 in {
+let IsSignExtendingOpW = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 09ecbbc7e8feb81..201850060fe04a4 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -50,16 +50,16 @@ define void @callee() nounwind {
; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui a6, %hi(var)
-; RV32I-NEXT: lw a0, %lo(var)(a6)
+; RV32I-NEXT: lui a4, %hi(var)
+; RV32I-NEXT: lw a0, %lo(var)(a4)
; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var+4)(a6)
+; RV32I-NEXT: lw a0, %lo(var+4)(a4)
; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var+8)(a6)
+; RV32I-NEXT: lw a0, %lo(var+8)(a4)
; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var+12)(a6)
+; RV32I-NEXT: lw a0, %lo(var+12)(a4)
; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a5, a6, %lo(var)
+; RV32I-NEXT: addi a5, a4, %lo(var)
; RV32I-NEXT: lw a0, 16(a5)
; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a0, 20(a5)
@@ -84,18 +84,18 @@ define void @callee() nounwind {
; RV32I-NEXT: lw s10, 92(a5)
; RV32I-NEXT: lw s11, 96(a5)
; RV32I-NEXT: lw ra, 100(a5)
-; RV32I-NEXT: lw a7, 104(a5)
-; RV32I-NEXT: lw a4, 108(a5)
-; RV32I-NEXT: lw a0, 124(a5)
-; RV32I-NEXT: lw a1, 120(a5)
-; RV32I-NEXT: lw a2, 116(a5)
-; RV32I-NEXT: lw a3, 112(a5)
-; RV32I-NEXT: sw a0, 124(a5)
-; RV32I-NEXT: sw a1, 120(a5)
-; RV32I-NEXT: sw a2, 116(a5)
-; RV32I-NEXT: sw a3, 112(a5)
-; RV32I-NEXT: sw a4, 108(a5)
-; RV32I-NEXT: sw a7, 104(a5)
+; RV32I-NEXT: lw a6, 104(a5)
+; RV32I-NEXT: lw a3, 108(a5)
+; RV32I-NEXT: lw a7, 124(a5)
+; RV32I-NEXT: lw a0, 120(a5)
+; RV32I-NEXT: lw a1, 116(a5)
+; RV32I-NEXT: lw a2, 112(a5)
+; RV32I-NEXT: sw a7, 124(a5)
+; RV32I-NEXT: sw a0, 120(a5)
+; RV32I-NEXT: sw a1, 116(a5)
+; RV32I-NEXT: sw a2, 112(a5)
+; RV32I-NEXT: sw a3, 108(a5)
+; RV32I-NEXT: sw a6, 104(a5)
; RV32I-NEXT: sw ra, 100(a5)
; RV32I-NEXT: sw s11, 96(a5)
; RV32I-NEXT: sw s10, 92(a5)
@@ -121,13 +121,13 @@ define void @callee() nounwind {
; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: sw a0, 16(a5)
; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var+12)(a6)
+; RV32I-NEXT: sw a0, %lo(var+12)(a4)
; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var+8)(a6)
+; RV32I-NEXT: sw a0, %lo(var+8)(a4)
; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var+4)(a6)
+; RV32I-NEXT: sw a0, %lo(var+4)(a4)
; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var)(a6)
+; RV32I-NEXT: sw a0, %lo(var)(a4)
; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -161,16 +161,16 @@ define void @callee() nounwind {
; RV32I-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32I-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
; RV32I-WITH-FP-NEXT: addi s0, sp, 80
-; RV32I-WITH-FP-NEXT: lui a6, %hi(var)
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT: lui a4, %hi(var)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(a4)
; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(a6)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(a4)
; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(a6)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(a4)
; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(a6)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(a4)
; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: addi a5, a6, %lo(var)
+; RV32I-WITH-FP-NEXT: addi a5, a4, %lo(var)
; RV32I-WITH-FP-NEXT: lw a0, 16(a5)
; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill
; RV32I-WITH-FP-NEXT: lw a0, 20(a5)
@@ -195,20 +195,20 @@ define void @callee() nounwind {
; RV32I-WITH-FP-NEXT: lw s10, 88(a5)
; RV32I-WITH-FP-NEXT: lw s11, 92(a5)
; RV32I-WITH-FP-NEXT: lw ra, 96(a5)
-; RV32I-WITH-FP-NEXT: lw t0, 100(a5)
-; RV32I-WITH-FP-NEXT: lw a7, 104(a5)
-; RV32I-WITH-FP-NEXT: lw a4, 108(a5)
-; RV32I-WITH-FP-NEXT: lw a0, 124(a5)
-; RV32I-WITH-FP-NEXT: lw a1, 120(a5)
-; RV32I-WITH-FP-NEXT: lw a2, 116(a5)
-; RV32I-WITH-FP-NEXT: lw a3, 112(a5)
-; RV32I-WITH-FP-NEXT: sw a0, 124(a5)
-; RV32I-WITH-FP-NEXT: sw a1, 120(a5)
-; RV32I-WITH-FP-NEXT: sw a2, 116(a5)
-; RV32I-WITH-FP-NEXT: sw a3, 112(a5)
-; RV32I-WITH-FP-NEXT: sw a4, 108(a5)
-; RV32I-WITH-FP-NEXT: sw a7, 104(a5)
-; RV32I-WITH-FP-NEXT: sw t0, 100(a5)
+; RV32I-WITH-FP-NEXT: lw a7, 100(a5)
+; RV32I-WITH-FP-NEXT: lw a6, 104(a5)
+; RV32I-WITH-FP-NEXT: lw a3, 108(a5)
+; RV32I-WITH-FP-NEXT: lw t0, 124(a5)
+; RV32I-WITH-FP-NEXT: lw a0, 120(a5)
+; RV32I-WITH-FP-NEXT: lw a1, 116(a5)
+; RV32I-WITH-FP-NEXT: lw a2, 112(a5)
+; RV32I-WITH-FP-NEXT: sw t0, 124(a5)
+; RV32I-WITH-FP-NEXT: sw a0, 120(a5)
+; RV32I-WITH-FP-NEXT: sw a1, 116(a5)
+; RV32I-WITH-FP-NEXT: sw a2, 112(a5)
+; RV32I-WITH-FP-NEXT: sw a3, 108(a5)
+; RV32I-WITH-FP-NEXT: sw a6, 104(a5)
+; RV32I-WITH-FP-NEXT: sw a7, 100(a5)
; RV32I-WITH-FP-NEXT: sw ra, 96(a5)
; RV32I-WITH-FP-NEXT: sw s11, 92(a5)
; RV32I-WITH-FP-NEXT: sw s10, 88(a5)
@@ -234,13 +234,13 @@ define void @callee() nounwind {
; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: sw a0, 16(a5)
; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(a4)
; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(a4)
; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(a4)
; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(a4)
; RV32I-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -260,16 +260,16 @@ define void @callee() nounwind {
; RV32IZCMP-LABEL: callee:
; RV32IZCMP: # %bb.0:
; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT: lui a6, %hi(var)
-; RV32IZCMP-NEXT: lw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT: lui a4, %hi(var)
+; RV32IZCMP-NEXT: lw a0, %lo(var)(a4)
; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a4)
; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a4)
; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a4)
; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: addi a5, a6, %lo(var)
+; RV32IZCMP-NEXT: addi a5, a4, %lo(var)
; RV32IZCMP-NEXT: lw a0, 16(a5)
; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: lw a0, 20(a5)
@@ -289,28 +289,28 @@ define void @callee() nounwind {
; RV32IZCMP-NEXT: lw s11, 72(a5)
; RV32IZCMP-NEXT: lw ra, 76(a5)
; RV32IZCMP-NEXT: lw s1, 80(a5)
-; RV32IZCMP-NEXT: lw t3, 84(a5)
-; RV32IZCMP-NEXT: lw t2, 88(a5)
-; RV32IZCMP-NEXT: lw t1, 92(a5)
-; RV32IZCMP-NEXT: lw t0, 96(a5)
+; RV32IZCMP-NEXT: lw t2, 84(a5)
+; RV32IZCMP-NEXT: lw t1, 88(a5)
+; RV32IZCMP-NEXT: lw t0, 92(a5)
+; RV32IZCMP-NEXT: lw a7, 96(a5)
; RV32IZCMP-NEXT: lw s0, 100(a5)
-; RV32IZCMP-NEXT: lw a7, 104(a5)
-; RV32IZCMP-NEXT: lw a4, 108(a5)
-; RV32IZCMP-NEXT: lw a0, 124(a5)
-; RV32IZCMP-NEXT: lw a1, 120(a5)
-; RV32IZCMP-NEXT: lw a2, 116(a5)
-; RV32IZCMP-NEXT: lw a3, 112(a5)
-; RV32IZCMP-NEXT: sw a0, 124(a5)
-; RV32IZCMP-NEXT: sw a1, 120(a5)
-; RV32IZCMP-NEXT: sw a2, 116(a5)
-; RV32IZCMP-NEXT: sw a3, 112(a5)
-; RV32IZCMP-NEXT: sw a4, 108(a5)
-; RV32IZCMP-NEXT: sw a7, 104(a5)
+; RV32IZCMP-NEXT: lw a6, 104(a5)
+; RV32IZCMP-NEXT: lw a3, 108(a5)
+; RV32IZCMP-NEXT: lw t3, 124(a5)
+; RV32IZCMP-NEXT: lw a0, 120(a5)
+; RV32IZCMP-NEXT: lw a1, 116(a5)
+; RV32IZCMP-NEXT: lw a2, 112(a5)
+; RV32IZCMP-NEXT: sw t3, 124(a5)
+; RV32IZCMP-NEXT: sw a0, 120(a5)
+; RV32IZCMP-NEXT: sw a1, 116(a5)
+; RV32IZCMP-NEXT: sw a2, 112(a5)
+; RV32IZCMP-NEXT: sw a3, 108(a5)
+; RV32IZCMP-NEXT: sw a6, 104(a5)
; RV32IZCMP-NEXT: sw s0, 100(a5)
-; RV32IZCMP-NEXT: sw t0, 96(a5)
-; RV32IZCMP-NEXT: sw t1, 92(a5)
-; RV32IZCMP-NEXT: sw t2, 88(a5)
-; RV32IZCMP-NEXT: sw t3, 84(a5)
+; RV32IZCMP-NEXT: sw a7, 96(a5)
+; RV32IZCMP-NEXT: sw t0, 92(a5)
+; RV32IZCMP-NEXT: sw t1, 88(a5)
+; RV32IZCMP-NEXT: sw t2, 84(a5)
; RV32IZCMP-NEXT: sw s1, 80(a5)
; RV32IZCMP-NEXT: sw ra, 76(a5)
; RV32IZCMP-NEXT: sw s11, 72(a5)
@@ -331,13 +331,13 @@ define void @callee() nounwind {
; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: sw a0, 16(a5)
; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a4)
; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a4)
; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a4)
; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var)(a4)
; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 96
;
; RV32IZCMP-WITH-FP-LABEL: callee:
@@ -357,16 +357,16 @@ define void @callee() nounwind {
; RV32IZCMP-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32IZCMP-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 80
-; RV32IZCMP-WITH-FP-NEXT: lui a6, %hi(var)
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lui a4, %hi(var)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a4)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a4)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a4)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a4)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: addi a5, a6, %lo(var)
+; RV32IZCMP-WITH-FP-NEXT: addi a5, a4, %lo(var)
; RV32IZCMP-WITH-FP-NEXT: lw a0, 16(a5)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill
; RV32IZCMP-WITH-FP-NEXT: lw a0, 20(a5)
@@ -386,30 +386,30 @@ define void @callee() nounwind {
; RV32IZCMP-WITH-FP-NEXT: lw s10, 68(a5)
; RV32IZCMP-WITH-FP-NEXT: lw s11, 72(a5)
; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t4, 80(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t3, 84(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t2, 88(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw t3, 80(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw t2, 84(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw t1, 88(a5)
; RV32IZCMP-WITH-FP-NEXT: lw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t1, 96(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t1, 96(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw t0, 96(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a7, 100(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a6, 104(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a3, 108(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw t4, 124(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, 120(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a1, 116(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a2, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw t4, 124(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, 120(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a1, 116(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a2, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a3, 108(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a6, 104(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a7, 100(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw t0, 96(a5)
; RV32IZCMP-WITH-FP-NEXT: sw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t2, 88(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t3, 84(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t4, 80(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw t1, 88(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw t2, 84(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw t3, 80(a5)
; RV32IZCMP-WITH-FP-NEXT: sw ra, 76(a5)
; RV32IZCMP-WITH-FP-NEXT: sw s11, 72(a5)
; RV32IZCMP-WITH-FP-NEXT: sw s10, 68(a5)
@@ -430,13 +430,13 @@ define void @callee() nounwind {
; RV32IZCMP-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: sw a0, 16(a5)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a4)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a4)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a4)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a4)
; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -469,16 +469,16 @@ define void @callee() nounwind {
; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a6, %hi(var)
-; RV64I-NEXT: lw a0, %lo(var)(a6)
+; RV64I-NEXT: lui a4, %hi(var)
+; RV64I-NEXT: lw a0, %lo(var)(a4)
; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var+4)(a6)
+; RV64I-NEXT: lw a0, %lo(var+4)(a4)
; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var+8)(a6)
+; RV64I-NEXT: lw a0, %lo(var+8)(a4)
; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var+12)(a6)
+; RV64I-NEXT: lw a0, %lo(var+12)(a4)
; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addi a5, a6, %lo(var)
+; RV64I-NEXT: addi a5, a4, %lo(var)
; RV64I-NEXT: lw a0, 16(a5)
; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: lw a0, 20(a5)
@@ -503,18 +503,18 @@ define void @callee() nounwind {
; RV64I-NEXT: lw s10, 92(a5)
; RV64I-NEXT: lw s11, 96(a5)
; RV64I-NEXT: lw ra, 100(a5)
-; RV64I-NEXT: lw a7, 104(a5)
-; RV64I-NEXT: lw a4, 108(a5)
-; RV64I...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
if (isLoadFromStackSlot(MI, FrameIdx) && AggressiveLoadRemat) { | ||
for (; DefItr != UseItr && DefItr != MBB->end(); DefItr++) { | ||
int StoreFrameIdx = 0; | ||
if ((*DefItr).isCall() || (isStoreToStackSlot(*DefItr, StoreFrameIdx) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if there is store, but we don't know exactly where it stores to? Don't we need to conservatively assume it could write to the slot we're loading?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if there is store, but we don't know exactly where it stores to? Don't we need to conservatively assume it could write to the slot we're loading?
@topperc I did some mistakes with the PR workflow and it is failing, plan to drop this PR and open new. Will add you to new PR and lets address it there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True, storing to a pointer to a variable that is in the stack for example.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@andcarminati Addressed in the new PR, #73924
if ((MBB = MI.getParent()) != UseMI->getParent()) | ||
return false; | ||
|
||
// When loading from stack and the stack slot is not modifed before its use, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
modifed -> modified
Re materializing load is disabled for RISCV back end, and this can bring performance issues like this. https://discourse.llvm.org/t/how-to-copy-propagate-physical-register-introduced-before-ra/74828
Here I tried to enable the re materialization to fulfill the use case discussed in the above thread.