From 508836b7d5f6e8910aae7866fabbb8fe4964a197 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Mon, 15 Sep 2025 08:35:00 -0500 Subject: [PATCH 1/8] In-progress work --- .../lib/Target/AMDGPU/SICustomBranchBundles.h | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 llvm/lib/Target/AMDGPU/SICustomBranchBundles.h diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h new file mode 100644 index 0000000000000..fc96c4370879e --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -0,0 +1,144 @@ +#pragma once + +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/ErrorHandling.h" + +#include "SIInstrInfo.h" + +#include +#include + +using namespace llvm; + +using std::unordered_set; +using std::vector; + +static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MBB, + MachineBasicBlock &dest_MBB) { + auto& TII = *branching_MBB.getParent()->getSubtarget().getInstrInfo(); + for (MachineInstr &branch_MI : reverse(branching_MBB.instrs())) + if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &dest_MBB) + return branch_MI; + + llvm_unreachable("Don't call this if there's no branch to the destination."); +} + +static inline void move_ins_before_phis(MachineInstr &MI, + MachineBasicBlock &MBB) { + MachineFunction& MF = *MBB.getParent(); + auto& TII = *MF.getSubtarget().getInstrInfo(); + + if (MBB.size() == 0 || MBB.front().getOpcode() != AMDGPU::PHI /*do we need to check for debug instructions here?*/) + MBB.insert(MBB.begin(), &MI); + else + for (auto* pred_MBB : MBB.predecessors()) + { + MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB); + if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &MBB) { + MachineInstr* cloned_MI = MF.CloneMachineInstr(&MI); + pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI); + cloned_MI->bundleWithPred(); + } + } + + MI.eraseFromParent(); +} + +static inline MachineBasicBlock::instr_iterator +get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) { + MachineFunction& MF = *pred_MBB.getParent(); + auto& TII = *MF.getSubtarget().getInstrInfo(); + + for (MachineInstr &branch_MI : reverse(pred_MBB.instrs())) + if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB) + return std::next(branch_MI.getIterator()); + else + assert(branch_MI.isBranch() && "Shouldn't have fall-throughs here."); + + llvm_unreachable("There should always be a branch to succ_MBB."); +} + +static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { + auto& TII = *MF.getSubtarget().getInstrInfo(); + + struct CFG_Rewrite_Entry { + unordered_set pred_MBBs; + MachineBasicBlock *succ_MBB; + vector body; + }; + + auto epilogs_are_identical = [](const vector left, + const vector right) { + if (left.size() != right.size()) + return false; + + for (unsigned i = 0; i < left.size(); i++) + if (!left[i]->isIdenticalTo(*right[i])) + return false; + return true; + }; + + auto move_body = [](vector &body, + MachineBasicBlock &dest_MBB) { + for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) { + MachineInstr &body_ins = **rev_it; + body_ins.removeFromBundle(); + dest_MBB.insert(dest_MBB.begin(), &body_ins); + } + }; + + vector cfg_rewrite_entries; + for (MachineBasicBlock &MBB : MF) { + CFG_Rewrite_Entry to_insert = {{}, &MBB, {}}; + for (MachineBasicBlock *pred_MBB : MBB.predecessors()) { + MachineBasicBlock::instr_iterator ep_it = + get_epilog_for_successor(*pred_MBB, MBB)->getIterator(); + + vector epilog; + while (!ep_it.isEnd()) + epilog.push_back(&*ep_it++); + + if (!epilogs_are_identical(to_insert.body, epilog)) { + if (to_insert.pred_MBBs.size() && to_insert.body.size()) { + // Potentially, we need to insert a new entry. But first see if we + // can find an existing entry with the same epilog. + bool existing_entry_found = false; + for (auto rev_it = cfg_rewrite_entries.rbegin(); + rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB; + rev_it++) + if (epilogs_are_identical(rev_it->body, epilog)) { + rev_it->pred_MBBs.insert(pred_MBB); + existing_entry_found = true; + break; + } + + if(!existing_entry_found) + cfg_rewrite_entries.push_back(to_insert); + } + to_insert.pred_MBBs.clear(); + to_insert.body = epilog; + } + + to_insert.pred_MBBs.insert(pred_MBB); + } + + // Handle the last potential rewrite entry. Lower instead of journaling a + // rewrite entry if all predecessor MBBs are in this single entry. + if (to_insert.pred_MBBs.size() == MBB.pred_size()) + // Lower + move_body(to_insert.body,MBB); + else if (to_insert.body.size()) + cfg_rewrite_entries.push_back(to_insert); + + // Perform the journaled rewrites. + for (const auto &entry : cfg_rewrite_entries) { + MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock(); + for(MachineBasicBlock* pred_MBB : entry.pred_MBBs) + { + + } + } +} From ca2af95b83717b3a425eeeca5f89f79830ddd875 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Tue, 16 Sep 2025 05:01:32 -0500 Subject: [PATCH 2/8] Finish initial implementation ... I hope. --- .../lib/Target/AMDGPU/SICustomBranchBundles.h | 41 ++++++++++++++----- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 3 ++ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h index fc96c4370879e..81872f4fe8ab4 100644 --- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -26,14 +26,17 @@ static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MB llvm_unreachable("Don't call this if there's no branch to the destination."); } -static inline void move_ins_before_phis(MachineInstr &MI, - MachineBasicBlock &MBB) { +static inline void move_ins_before_phis(MachineInstr &MI) { + MachineBasicBlock& MBB = *MI.getParent(); MachineFunction& MF = *MBB.getParent(); auto& TII = *MF.getSubtarget().getInstrInfo(); - if (MBB.size() == 0 || MBB.front().getOpcode() != AMDGPU::PHI /*do we need to check for debug instructions here?*/) + if (MBB.size() == 0 || + MBB.front().getOpcode() != + AMDGPU::PHI /*do we need to check for debug instructions here?*/) { + MI.removeFromParent(); MBB.insert(MBB.begin(), &MI); - else + } else { for (auto* pred_MBB : MBB.predecessors()) { MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB); @@ -43,8 +46,8 @@ static inline void move_ins_before_phis(MachineInstr &MI, cloned_MI->bundleWithPred(); } } - - MI.eraseFromParent(); + MI.eraseFromParent(); + } } static inline MachineBasicBlock::instr_iterator @@ -132,13 +135,31 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { move_body(to_insert.body,MBB); else if (to_insert.body.size()) cfg_rewrite_entries.push_back(to_insert); + } // Perform the journaled rewrites. - for (const auto &entry : cfg_rewrite_entries) { + for (auto &entry : cfg_rewrite_entries) { MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock(); - for(MachineBasicBlock* pred_MBB : entry.pred_MBBs) - { - + + // Deal with mezzanine to successor succession. + BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB); + mezzanine_MBB->addSuccessor(entry.succ_MBB); + + // Move instructions to mezzanine block. + move_body(entry.body, *mezzanine_MBB); + + for (MachineBasicBlock *pred_MBB : entry.pred_MBBs) { + //Deal with predecessor to mezzanine succession. + MachineInstr &branch_ins = + get_branch_with_dest(*pred_MBB, *entry.succ_MBB); + assert(branch_ins.getOperand(0).isMBB() && "Branch instruction isn't."); + branch_ins.getOperand(0).setMBB(mezzanine_MBB); + pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB); + + // Delete instructions that were lowered from epilog + auto epilog_it = get_epilog_for_successor(*pred_MBB, *entry.succ_MBB); + while (!epilog_it.isEnd()) + epilog_it++->eraseFromBundle(); } } } diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 115a020f44098..ee4d6709eeafc 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,6 +48,7 @@ /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// +#include "SICustomBranchBundles.h" #include "SILowerControlFlow.h" #include "AMDGPU.h" #include "AMDGPULaneMaskUtils.h" @@ -323,6 +324,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { if (LV) LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); + move_ins_before_phis(*OrSaveExec); + MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); MachineBasicBlock::iterator ElsePt(MI); From 301f997ef4fdbd57f75167d0202c9a9cb49ea8cd Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Tue, 16 Sep 2025 05:39:36 -0500 Subject: [PATCH 3/8] Now to write the pass to get rid of the bundles --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 +++++--- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 8 ++++++++ llvm/lib/Target/AMDGPU/SILowerControlFlow.h | 8 ++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 92a587b5771b6..fa454730364c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1563,7 +1563,7 @@ void GCNPassConfig::addFastRegAlloc() { // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID); @@ -1586,10 +1586,12 @@ void GCNPassConfig::addOptimizedRegAlloc() { if (OptVGPRLiveRange) insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID); + insertPass(&SIOptimizeVGPRLiveRangeLegacyID, &SILowerControlFlowLegacyID); + // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); + //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID); if (EnableRewritePartialRegUses) insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); @@ -2256,7 +2258,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. - insertPass(SILowerControlFlowPass()); + //insertPass(SILowerControlFlowPass()); if (EnableRewritePartialRegUses) insertPass(GCNRewritePartialRegUsesPass()); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index ee4d6709eeafc..ea3d2ed61a808 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -153,6 +153,14 @@ class SILowerControlFlowLegacy : public MachineFunctionPass { return "SI Lower control flow pseudo instructions"; } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().setIsSSA(); + } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addUsedIfAvailable(); // Should preserve the same set that TwoAddressInstructions does. diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h index 23803c679c246..0f4df79952999 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h @@ -16,6 +16,14 @@ class SILowerControlFlowPass : public PassInfoMixin { public: PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { + return MachineFunctionProperties().setIsSSA(); + } + + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().setNoPHIs(); + } }; } // namespace llvm From df60d5a95b7f357c6f8f8b89dadc7e37696a300a Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Tue, 16 Sep 2025 06:07:26 -0500 Subject: [PATCH 4/8] For my next act, I'll actually make this SSA form by creating different VREGs in the hoisted instructions and merging them with PHI. --- llvm/lib/Target/AMDGPU/SICustomBranchBundles.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h index 81872f4fe8ab4..51d1e84a47c4c 100644 --- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -31,9 +31,14 @@ static inline void move_ins_before_phis(MachineInstr &MI) { MachineFunction& MF = *MBB.getParent(); auto& TII = *MF.getSubtarget().getInstrInfo(); - if (MBB.size() == 0 || - MBB.front().getOpcode() != - AMDGPU::PHI /*do we need to check for debug instructions here?*/) { + bool phi_seen = false; + for (MachineInstr &maybe_phi : MBB) + if (maybe_phi.getOpcode() == AMDGPU::PHI) { + phi_seen = true; + break; + } + + if (!phi_seen) { MI.removeFromParent(); MBB.insert(MBB.begin(), &MI); } else { From c701bea08d281c6b310b185d79209fb78fd816b7 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 17 Sep 2025 06:36:07 -0500 Subject: [PATCH 5/8] More work --- .../lib/Target/AMDGPU/SICustomBranchBundles.h | 136 +++++++++++++----- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 6 + 2 files changed, 109 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h index 51d1e84a47c4c..18888975bdba0 100644 --- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -4,6 +4,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/ErrorHandling.h" #include "SIInstrInfo.h" @@ -29,11 +30,13 @@ static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MB static inline void move_ins_before_phis(MachineInstr &MI) { MachineBasicBlock& MBB = *MI.getParent(); MachineFunction& MF = *MBB.getParent(); - auto& TII = *MF.getSubtarget().getInstrInfo(); + auto &TII = *MF.getSubtarget().getInstrInfo(); + auto& MRI = MF.getRegInfo(); bool phi_seen = false; - for (MachineInstr &maybe_phi : MBB) - if (maybe_phi.getOpcode() == AMDGPU::PHI) { + MachineBasicBlock::iterator first_phi; + for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++) + if (first_phi->getOpcode() == AMDGPU::PHI) { phi_seen = true; break; } @@ -42,20 +45,46 @@ static inline void move_ins_before_phis(MachineInstr &MI) { MI.removeFromParent(); MBB.insert(MBB.begin(), &MI); } else { - for (auto* pred_MBB : MBB.predecessors()) - { + auto phi = BuildMI(MBB, first_phi, MI.getDebugLoc(), TII.get(AMDGPU::PHI), + MI.getOperand(0).getReg()); + for (auto *pred_MBB : MBB.predecessors()) { + Register cloned_reg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB); - if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &MBB) { - MachineInstr* cloned_MI = MF.CloneMachineInstr(&MI); - pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI); - cloned_MI->bundleWithPred(); - } + MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI); + cloned_MI->getOperand(0).setReg(cloned_reg); + phi.addReg(cloned_reg).addMBB(pred_MBB); + pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI); + cloned_MI->bundleWithPred(); } MI.eraseFromParent(); } } -static inline MachineBasicBlock::instr_iterator +struct Epilog_Iterator { + MachineBasicBlock::instr_iterator internal_it; + Epilog_Iterator(MachineBasicBlock::instr_iterator i) : internal_it(i) {} + + bool operator==(const Epilog_Iterator &other) { + return internal_it == other.internal_it; + } + bool isEnd() { return internal_it.isEnd(); } + MachineInstr &operator*() { return *internal_it; } + MachineBasicBlock::instr_iterator operator->() { return internal_it; } + Epilog_Iterator &operator++() { + ++internal_it; + if (!internal_it.isEnd() && internal_it->isBranch()) + internal_it = internal_it->getParent()->instr_end(); + return *this; + } + Epilog_Iterator operator++(int ignored) { + Epilog_Iterator to_return = *this; + ++*this; + return to_return; + } + +}; + +static inline Epilog_Iterator get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) { MachineFunction& MF = *pred_MBB.getParent(); auto& TII = *MF.getSubtarget().getInstrInfo(); @@ -69,6 +98,27 @@ get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MB llvm_unreachable("There should always be a branch to succ_MBB."); } +static inline bool epilogs_are_identical(const vector left, + const vector right, + const MachineBasicBlock &succ_MBB) { + if (left.size() != right.size()) + return false; + + for (unsigned i = 0; i < left.size(); i++) + if (!left[i]->isIdenticalTo(*right[i])) + return false; + return true; +} + +static inline void move_body(vector &body, + MachineBasicBlock &dest_MBB) { + for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) { + MachineInstr &body_ins = **rev_it; + body_ins.removeFromBundle(); + dest_MBB.insert(dest_MBB.begin(), &body_ins); + } +} + static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { auto& TII = *MF.getSubtarget().getInstrInfo(); @@ -78,26 +128,6 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { vector body; }; - auto epilogs_are_identical = [](const vector left, - const vector right) { - if (left.size() != right.size()) - return false; - - for (unsigned i = 0; i < left.size(); i++) - if (!left[i]->isIdenticalTo(*right[i])) - return false; - return true; - }; - - auto move_body = [](vector &body, - MachineBasicBlock &dest_MBB) { - for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) { - MachineInstr &body_ins = **rev_it; - body_ins.removeFromBundle(); - dest_MBB.insert(dest_MBB.begin(), &body_ins); - } - }; - vector cfg_rewrite_entries; for (MachineBasicBlock &MBB : MF) { CFG_Rewrite_Entry to_insert = {{}, &MBB, {}}; @@ -109,7 +139,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { while (!ep_it.isEnd()) epilog.push_back(&*ep_it++); - if (!epilogs_are_identical(to_insert.body, epilog)) { + if (!epilogs_are_identical(to_insert.body, epilog, MBB)) { if (to_insert.pred_MBBs.size() && to_insert.body.size()) { // Potentially, we need to insert a new entry. But first see if we // can find an existing entry with the same epilog. @@ -117,7 +147,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { for (auto rev_it = cfg_rewrite_entries.rbegin(); rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB; rev_it++) - if (epilogs_are_identical(rev_it->body, epilog)) { + if (epilogs_are_identical(rev_it->body, epilog, MBB)) { rev_it->pred_MBBs.insert(pred_MBB); existing_entry_found = true; break; @@ -168,3 +198,43 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { } } } + +namespace std { + template <> + struct hash + { + std::size_t operator()(const Register& r) const + { + return hash()(r); + } + }; +} + +static inline void hoist_unrelated_copies(MachineFunction &MF) { + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &branch_MI : MBB) { + if (!branch_MI.isBranch()) + continue; + + unordered_set related_copy_sources; + Epilog_Iterator epilog_it = branch_MI.getIterator(); + Epilog_Iterator copy_move_it = ++epilog_it; + while (!epilog_it.isEnd()) { + if (epilog_it->getOpcode() != AMDGPU::COPY) + related_copy_sources.insert(epilog_it->getOperand(0).getReg()); + ++epilog_it; + } + + while (!copy_move_it.isEnd()) { + Epilog_Iterator next = copy_move_it; ++next; + if (copy_move_it->getOpcode() == AMDGPU::COPY && + !related_copy_sources.count(copy_move_it->getOperand(1).getReg())) { + MachineInstr &MI_to_move = *copy_move_it; + MI_to_move.removeFromBundle(); + MBB.insert(branch_MI.getIterator(),&MI_to_move); + } + + copy_move_it = next; + } + } +} diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index ea3d2ed61a808..c6b8ace9d9718 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -851,6 +851,12 @@ bool SILowerControlFlow::run(MachineFunction &MF) { LoweredIf.clear(); KillBlocks.clear(); + if (Changed) + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.isBundled()) + MI.unbundleFromSucc(); + return Changed; } From 39c58b791071fa5114e4963e84816c1ac715bc7b Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Thu, 18 Sep 2025 03:12:03 -0500 Subject: [PATCH 6/8] Works on simple testcase --- llvm/lib/Target/AMDGPU/AMDGPU.h | 3 ++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 ++ llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + .../lib/Target/AMDGPU/SICustomBranchBundles.h | 14 +++--- .../Target/AMDGPU/SIRestoreNormalEpilog.cpp | 46 +++++++++++++++++++ 5 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 0f2c33585884f..f402efb2ea86e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -248,6 +248,9 @@ extern char &AMDGPUPreloadKernArgPrologLegacyID; void initializeAMDGPUPreloadKernelArgumentsLegacyPass(PassRegistry &); extern char &AMDGPUPreloadKernelArgumentsLegacyID; +void initializeSIRestoreNormalEpilogLegacyPass(PassRegistry &); +extern char &SIRestoreNormalEpilogLegacyID; + // Passes common to R600 and SI FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fa454730364c4..8210ad88b3d3c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -597,6 +597,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesLegacyPass(*PR); + initializeSIRestoreNormalEpilogLegacyPass(*PR); initializeSIPostRABundlerLegacyPass(*PR); initializeGCNCreateVOPDLegacyPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); @@ -1595,6 +1596,8 @@ void GCNPassConfig::addOptimizedRegAlloc() { if (EnableRewritePartialRegUses) insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID); + + insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID); if (isPassEnabled(EnablePreRAOptimizations)) insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index aae56eef73edd..996cb635b3c9c 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -183,6 +183,7 @@ add_llvm_target(AMDGPUCodeGen SIPreEmitPeephole.cpp SIProgramInfo.cpp SIRegisterInfo.cpp + SIRestoreNormalEpilog.cpp SIShrinkInstructions.cpp SIWholeQuadMode.cpp diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h index 18888975bdba0..0f33a10ed8f33 100644 --- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -91,9 +91,7 @@ get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MB for (MachineInstr &branch_MI : reverse(pred_MBB.instrs())) if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB) - return std::next(branch_MI.getIterator()); - else - assert(branch_MI.isBranch() && "Shouldn't have fall-throughs here."); + return ++Epilog_Iterator(branch_MI.getIterator()); llvm_unreachable("There should always be a branch to succ_MBB."); } @@ -132,8 +130,8 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) { CFG_Rewrite_Entry to_insert = {{}, &MBB, {}}; for (MachineBasicBlock *pred_MBB : MBB.predecessors()) { - MachineBasicBlock::instr_iterator ep_it = - get_epilog_for_successor(*pred_MBB, MBB)->getIterator(); + Epilog_Iterator ep_it = + get_epilog_for_successor(*pred_MBB, MBB); vector epilog; while (!ep_it.isEnd()) @@ -175,6 +173,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { // Perform the journaled rewrites. for (auto &entry : cfg_rewrite_entries) { MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(),mezzanine_MBB); // Deal with mezzanine to successor succession. BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB); @@ -192,7 +191,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB); // Delete instructions that were lowered from epilog - auto epilog_it = get_epilog_for_successor(*pred_MBB, *entry.succ_MBB); + auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator()); while (!epilog_it.isEnd()) epilog_it++->eraseFromBundle(); } @@ -228,7 +227,8 @@ static inline void hoist_unrelated_copies(MachineFunction &MF) { while (!copy_move_it.isEnd()) { Epilog_Iterator next = copy_move_it; ++next; if (copy_move_it->getOpcode() == AMDGPU::COPY && - !related_copy_sources.count(copy_move_it->getOperand(1).getReg())) { + !related_copy_sources.count(copy_move_it->getOperand(1).getReg()) + || copy_move_it->getOpcode() == AMDGPU::IMPLICIT_DEF) { MachineInstr &MI_to_move = *copy_move_it; MI_to_move.removeFromBundle(); MBB.insert(branch_MI.getIterator(),&MI_to_move); diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp new file mode 100644 index 0000000000000..ef565a9d9ad8f --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp @@ -0,0 +1,46 @@ +#include "SICustomBranchBundles.h" +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "si-restore-normal-epilog" + +namespace +{ + +class SIRestoreNormalEpilogLegacy : public MachineFunctionPass { +public: + static char ID; + + SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + hoist_unrelated_copies(MF); + normalize_ir_post_phi_elimination(MF); + return true; + } + + StringRef getPassName() const override { + return "SI Restore Normal Epilog Post PHI Elimination"; + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } + +}; + +} // namespace + +INITIALIZE_PASS(SIRestoreNormalEpilogLegacy, DEBUG_TYPE, + "SI restore normal epilog", false, false) + +char SIRestoreNormalEpilogLegacy::ID; +char &llvm::SIRestoreNormalEpilogLegacyID = SIRestoreNormalEpilogLegacy::ID; From 37ac452065c2b7022ef6ceb0448974d5e9734f93 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Thu, 18 Sep 2025 06:31:57 -0500 Subject: [PATCH 7/8] Must delete from all predecessors if lowering. --- llvm/lib/Target/AMDGPU/SICustomBranchBundles.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h index 0f33a10ed8f33..0e043b09fa136 100644 --- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -163,9 +163,18 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { // Handle the last potential rewrite entry. Lower instead of journaling a // rewrite entry if all predecessor MBBs are in this single entry. - if (to_insert.pred_MBBs.size() == MBB.pred_size()) - // Lower - move_body(to_insert.body,MBB); + if (to_insert.pred_MBBs.size() == MBB.pred_size()) { + move_body(to_insert.body, MBB); + for (MachineBasicBlock *pred_MBB : to_insert.pred_MBBs) { + // Delete instructions that were lowered from epilog + MachineInstr &branch_ins = + get_branch_with_dest(*pred_MBB, *to_insert.succ_MBB); + auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator()); + while (!epilog_it.isEnd()) + epilog_it++->eraseFromBundle(); + } + + } else if (to_insert.body.size()) cfg_rewrite_entries.push_back(to_insert); } From 07f9460d5c893e81abbcf2a5f17917505b2f1e22 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Tue, 23 Sep 2025 01:30:40 -0500 Subject: [PATCH 8/8] clang-format and camelcaser --- .../lib/Target/AMDGPU/SICustomBranchBundles.h | 292 +++++++++--------- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 2 +- .../Target/AMDGPU/SIRestoreNormalEpilog.cpp | 4 +- 3 files changed, 147 insertions(+), 151 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h index 0e043b09fa136..acb547236b1eb 100644 --- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h +++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h @@ -17,233 +17,229 @@ using namespace llvm; using std::unordered_set; using std::vector; -static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MBB, - MachineBasicBlock &dest_MBB) { - auto& TII = *branching_MBB.getParent()->getSubtarget().getInstrInfo(); - for (MachineInstr &branch_MI : reverse(branching_MBB.instrs())) - if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &dest_MBB) - return branch_MI; +static inline MachineInstr &getBranchWithDest(MachineBasicBlock &BranchingMBB, + MachineBasicBlock &DestMBB) { + auto &TII = + *BranchingMBB.getParent()->getSubtarget().getInstrInfo(); + for (MachineInstr &BranchMI : reverse(BranchingMBB.instrs())) + if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &DestMBB) + return BranchMI; llvm_unreachable("Don't call this if there's no branch to the destination."); } -static inline void move_ins_before_phis(MachineInstr &MI) { - MachineBasicBlock& MBB = *MI.getParent(); - MachineFunction& MF = *MBB.getParent(); +static inline void moveInsBeforePhis(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); auto &TII = *MF.getSubtarget().getInstrInfo(); - auto& MRI = MF.getRegInfo(); + auto &MRI = MF.getRegInfo(); - bool phi_seen = false; - MachineBasicBlock::iterator first_phi; - for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++) - if (first_phi->getOpcode() == AMDGPU::PHI) { - phi_seen = true; + bool PhiSeen = false; + MachineBasicBlock::iterator FirstPhi; + for (FirstPhi = MBB.begin(); FirstPhi != MBB.end(); FirstPhi++) + if (FirstPhi->getOpcode() == AMDGPU::PHI) { + PhiSeen = true; break; } - - if (!phi_seen) { + + if (!PhiSeen) { MI.removeFromParent(); MBB.insert(MBB.begin(), &MI); } else { - auto phi = BuildMI(MBB, first_phi, MI.getDebugLoc(), TII.get(AMDGPU::PHI), - MI.getOperand(0).getReg()); - for (auto *pred_MBB : MBB.predecessors()) { - Register cloned_reg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); - MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB); - MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI); - cloned_MI->getOperand(0).setReg(cloned_reg); - phi.addReg(cloned_reg).addMBB(pred_MBB); - pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI); - cloned_MI->bundleWithPred(); + auto Phi = BuildMI(MBB, FirstPhi, MI.getDebugLoc(), TII.get(AMDGPU::PHI), + MI.getOperand(0).getReg()); + for (auto *PredMBB : MBB.predecessors()) { + Register ClonedReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); + MachineInstr &BranchMI = getBranchWithDest(*PredMBB, MBB); + MachineInstr *ClonedMI = MF.CloneMachineInstr(&MI); + ClonedMI->getOperand(0).setReg(ClonedReg); + Phi.addReg(ClonedReg).addMBB(PredMBB); + PredMBB->insertAfterBundle(BranchMI.getIterator(), ClonedMI); + ClonedMI->bundleWithPred(); } MI.eraseFromParent(); } } -struct Epilog_Iterator { - MachineBasicBlock::instr_iterator internal_it; - Epilog_Iterator(MachineBasicBlock::instr_iterator i) : internal_it(i) {} +struct EpilogIterator { + MachineBasicBlock::instr_iterator InternalIt; + EpilogIterator(MachineBasicBlock::instr_iterator I) : InternalIt(I) {} - bool operator==(const Epilog_Iterator &other) { - return internal_it == other.internal_it; + bool operator==(const EpilogIterator &Other) { + return InternalIt == Other.InternalIt; } - bool isEnd() { return internal_it.isEnd(); } - MachineInstr &operator*() { return *internal_it; } - MachineBasicBlock::instr_iterator operator->() { return internal_it; } - Epilog_Iterator &operator++() { - ++internal_it; - if (!internal_it.isEnd() && internal_it->isBranch()) - internal_it = internal_it->getParent()->instr_end(); + bool isEnd() { return InternalIt.isEnd(); } + MachineInstr &operator*() { return *InternalIt; } + MachineBasicBlock::instr_iterator operator->() { return InternalIt; } + EpilogIterator &operator++() { + ++InternalIt; + if (!InternalIt.isEnd() && InternalIt->isBranch()) + InternalIt = InternalIt->getParent()->instr_end(); return *this; } - Epilog_Iterator operator++(int ignored) { - Epilog_Iterator to_return = *this; + EpilogIterator operator++(int Ignored) { + EpilogIterator ToReturn = *this; ++*this; - return to_return; + return ToReturn; } - }; -static inline Epilog_Iterator -get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) { - MachineFunction& MF = *pred_MBB.getParent(); - auto& TII = *MF.getSubtarget().getInstrInfo(); +static inline EpilogIterator getEpilogForSuccessor(MachineBasicBlock &PredMBB, + MachineBasicBlock &SuccMBB) { + MachineFunction &MF = *PredMBB.getParent(); + auto &TII = *MF.getSubtarget().getInstrInfo(); - for (MachineInstr &branch_MI : reverse(pred_MBB.instrs())) - if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB) - return ++Epilog_Iterator(branch_MI.getIterator()); + for (MachineInstr &BranchMI : reverse(PredMBB.instrs())) + if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &SuccMBB) + return ++EpilogIterator(BranchMI.getIterator()); llvm_unreachable("There should always be a branch to succ_MBB."); } -static inline bool epilogs_are_identical(const vector left, - const vector right, - const MachineBasicBlock &succ_MBB) { - if (left.size() != right.size()) +static inline bool epilogsAreIdentical(const vector Left, + const vector Right, + const MachineBasicBlock &SuccMBB) { + if (Left.size() != Right.size()) return false; - - for (unsigned i = 0; i < left.size(); i++) - if (!left[i]->isIdenticalTo(*right[i])) + + for (unsigned I = 0; I < Left.size(); I++) + if (!Left[I]->isIdenticalTo(*Right[I])) return false; return true; } -static inline void move_body(vector &body, - MachineBasicBlock &dest_MBB) { - for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) { - MachineInstr &body_ins = **rev_it; - body_ins.removeFromBundle(); - dest_MBB.insert(dest_MBB.begin(), &body_ins); +static inline void moveBody(vector &Body, + MachineBasicBlock &DestMBB) { + for (auto RevIt = Body.rbegin(); RevIt != Body.rend(); RevIt++) { + MachineInstr &BodyIns = **RevIt; + BodyIns.removeFromBundle(); + DestMBB.insert(DestMBB.begin(), &BodyIns); } } -static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) { - auto& TII = *MF.getSubtarget().getInstrInfo(); +static inline void normalizeIrPostPhiElimination(MachineFunction &MF) { + auto &TII = *MF.getSubtarget().getInstrInfo(); - struct CFG_Rewrite_Entry { - unordered_set pred_MBBs; - MachineBasicBlock *succ_MBB; - vector body; + struct CFGRewriteEntry { + unordered_set PredMBBs; + MachineBasicBlock *SuccMBB; + vector Body; }; - vector cfg_rewrite_entries; + vector CfgRewriteEntries; for (MachineBasicBlock &MBB : MF) { - CFG_Rewrite_Entry to_insert = {{}, &MBB, {}}; - for (MachineBasicBlock *pred_MBB : MBB.predecessors()) { - Epilog_Iterator ep_it = - get_epilog_for_successor(*pred_MBB, MBB); + CFGRewriteEntry ToInsert = {{}, &MBB, {}}; + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + EpilogIterator EpIt = getEpilogForSuccessor(*PredMBB, MBB); - vector epilog; - while (!ep_it.isEnd()) - epilog.push_back(&*ep_it++); + vector Epilog; + while (!EpIt.isEnd()) + Epilog.push_back(&*EpIt++); - if (!epilogs_are_identical(to_insert.body, epilog, MBB)) { - if (to_insert.pred_MBBs.size() && to_insert.body.size()) { + if (!epilogsAreIdentical(ToInsert.Body, Epilog, MBB)) { + if (ToInsert.PredMBBs.size() && ToInsert.Body.size()) { // Potentially, we need to insert a new entry. But first see if we // can find an existing entry with the same epilog. - bool existing_entry_found = false; - for (auto rev_it = cfg_rewrite_entries.rbegin(); - rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB; - rev_it++) - if (epilogs_are_identical(rev_it->body, epilog, MBB)) { - rev_it->pred_MBBs.insert(pred_MBB); - existing_entry_found = true; + bool ExistingEntryFound = false; + for (auto RevIt = CfgRewriteEntries.rbegin(); + RevIt != CfgRewriteEntries.rend() && RevIt->SuccMBB == &MBB; + RevIt++) + if (epilogsAreIdentical(RevIt->Body, Epilog, MBB)) { + RevIt->PredMBBs.insert(PredMBB); + ExistingEntryFound = true; break; } - if(!existing_entry_found) - cfg_rewrite_entries.push_back(to_insert); + if (!ExistingEntryFound) + CfgRewriteEntries.push_back(ToInsert); } - to_insert.pred_MBBs.clear(); - to_insert.body = epilog; + ToInsert.PredMBBs.clear(); + ToInsert.Body = Epilog; } - - to_insert.pred_MBBs.insert(pred_MBB); + + ToInsert.PredMBBs.insert(PredMBB); } // Handle the last potential rewrite entry. Lower instead of journaling a // rewrite entry if all predecessor MBBs are in this single entry. - if (to_insert.pred_MBBs.size() == MBB.pred_size()) { - move_body(to_insert.body, MBB); - for (MachineBasicBlock *pred_MBB : to_insert.pred_MBBs) { + if (ToInsert.PredMBBs.size() == MBB.pred_size()) { + moveBody(ToInsert.Body, MBB); + for (MachineBasicBlock *PredMBB : ToInsert.PredMBBs) { // Delete instructions that were lowered from epilog - MachineInstr &branch_ins = - get_branch_with_dest(*pred_MBB, *to_insert.succ_MBB); - auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator()); - while (!epilog_it.isEnd()) - epilog_it++->eraseFromBundle(); + MachineInstr &BranchIns = + getBranchWithDest(*PredMBB, *ToInsert.SuccMBB); + auto EpilogIt = ++EpilogIterator(BranchIns.getIterator()); + while (!EpilogIt.isEnd()) + EpilogIt++->eraseFromBundle(); } - } - else if (to_insert.body.size()) - cfg_rewrite_entries.push_back(to_insert); + } else if (ToInsert.Body.size()) + CfgRewriteEntries.push_back(ToInsert); } // Perform the journaled rewrites. - for (auto &entry : cfg_rewrite_entries) { - MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock(); - MF.insert(MF.end(),mezzanine_MBB); + for (auto &Entry : CfgRewriteEntries) { + MachineBasicBlock *MezzanineMBB = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), MezzanineMBB); // Deal with mezzanine to successor succession. - BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB); - mezzanine_MBB->addSuccessor(entry.succ_MBB); + BuildMI(MezzanineMBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)) + .addMBB(Entry.SuccMBB); + MezzanineMBB->addSuccessor(Entry.SuccMBB); // Move instructions to mezzanine block. - move_body(entry.body, *mezzanine_MBB); + moveBody(Entry.Body, *MezzanineMBB); - for (MachineBasicBlock *pred_MBB : entry.pred_MBBs) { - //Deal with predecessor to mezzanine succession. - MachineInstr &branch_ins = - get_branch_with_dest(*pred_MBB, *entry.succ_MBB); - assert(branch_ins.getOperand(0).isMBB() && "Branch instruction isn't."); - branch_ins.getOperand(0).setMBB(mezzanine_MBB); - pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB); + for (MachineBasicBlock *PredMBB : Entry.PredMBBs) { + // Deal with predecessor to mezzanine succession. + MachineInstr &BranchIns = getBranchWithDest(*PredMBB, *Entry.SuccMBB); + assert(BranchIns.getOperand(0).isMBB() && "Branch instruction isn't."); + BranchIns.getOperand(0).setMBB(MezzanineMBB); + PredMBB->replaceSuccessor(Entry.SuccMBB, MezzanineMBB); // Delete instructions that were lowered from epilog - auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator()); - while (!epilog_it.isEnd()) - epilog_it++->eraseFromBundle(); + auto EpilogIt = ++EpilogIterator(BranchIns.getIterator()); + while (!EpilogIt.isEnd()) + EpilogIt++->eraseFromBundle(); } } } namespace std { - template <> - struct hash - { - std::size_t operator()(const Register& r) const - { - return hash()(r); - } - }; -} +template <> struct hash { + std::size_t operator()(const Register &R) const { + return hash()(R); + } +}; +} // namespace std -static inline void hoist_unrelated_copies(MachineFunction &MF) { +static inline void hoistUnrelatedCopies(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) - for (MachineInstr &branch_MI : MBB) { - if (!branch_MI.isBranch()) + for (MachineInstr &BranchMI : MBB) { + if (!BranchMI.isBranch()) continue; - unordered_set related_copy_sources; - Epilog_Iterator epilog_it = branch_MI.getIterator(); - Epilog_Iterator copy_move_it = ++epilog_it; - while (!epilog_it.isEnd()) { - if (epilog_it->getOpcode() != AMDGPU::COPY) - related_copy_sources.insert(epilog_it->getOperand(0).getReg()); - ++epilog_it; + unordered_set RelatedCopySources; + EpilogIterator EpilogIt = BranchMI.getIterator(); + EpilogIterator CopyMoveIt = ++EpilogIt; + while (!EpilogIt.isEnd()) { + if (EpilogIt->getOpcode() != AMDGPU::COPY) + RelatedCopySources.insert(EpilogIt->getOperand(0).getReg()); + ++EpilogIt; } - while (!copy_move_it.isEnd()) { - Epilog_Iterator next = copy_move_it; ++next; - if (copy_move_it->getOpcode() == AMDGPU::COPY && - !related_copy_sources.count(copy_move_it->getOperand(1).getReg()) - || copy_move_it->getOpcode() == AMDGPU::IMPLICIT_DEF) { - MachineInstr &MI_to_move = *copy_move_it; - MI_to_move.removeFromBundle(); - MBB.insert(branch_MI.getIterator(),&MI_to_move); + while (!CopyMoveIt.isEnd()) { + EpilogIterator Next = CopyMoveIt; + ++Next; + if (CopyMoveIt->getOpcode() == AMDGPU::COPY && + !RelatedCopySources.count(CopyMoveIt->getOperand(1).getReg()) || + CopyMoveIt->getOpcode() == AMDGPU::IMPLICIT_DEF) { + MachineInstr &MIToMove = *CopyMoveIt; + MIToMove.removeFromBundle(); + MBB.insert(BranchMI.getIterator(), &MIToMove); } - - copy_move_it = next; + + CopyMoveIt = Next; } } } diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index c6b8ace9d9718..ef091088ae0a5 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -332,7 +332,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { if (LV) LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec); - move_ins_before_phis(*OrSaveExec); + moveInsBeforePhis(*OrSaveExec); MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp index ef565a9d9ad8f..d6d02c940731c 100644 --- a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp +++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp @@ -22,8 +22,8 @@ class SIRestoreNormalEpilogLegacy : public MachineFunctionPass { SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override { - hoist_unrelated_copies(MF); - normalize_ir_post_phi_elimination(MF); + hoistUnrelatedCopies(MF); + normalizeIrPostPhiElimination(MF); return true; }