-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Use some merging/unmerging helpers in SILoadStoreOptimizer #90866
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Factor out copyToDestRegs and copyFromSrcRegs for merging store sources and unmerging load results. NFC.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesFactor out copyToDestRegs and copyFromSrcRegs for merging store sources Full diff: https://github.com/llvm/llvm-project/pull/90866.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index a1d34f8b23ea30..723297d3814d59 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -219,12 +219,24 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
static unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI,
const CombineInfo &Paired);
- const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI,
- const CombineInfo &Paired);
+ const TargetRegisterClass *
+ getTargetRegisterClass(const CombineInfo &CI,
+ const CombineInfo &Paired) const;
const TargetRegisterClass *getDataRegClass(const MachineInstr &MI) const;
CombineInfo *checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired);
+ void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore, int OpName,
+ Register DestReg) const;
+ void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore, int OpName,
+ Register DestReg, unsigned SubRegIdx0,
+ unsigned SubRegIdx1) const;
+ Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore,
+ int OpName) const;
+
unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
MachineBasicBlock::iterator
@@ -1191,6 +1203,64 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
return Where;
}
+// Copy the merged load result from DestReg to the original dest regs of CI and
+// Paired.
+void SILoadStoreOptimizer::copyToDestRegs(
+ CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore, int OpName, Register DestReg,
+ unsigned SubRegIdx0, unsigned SubRegIdx1) const {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ // Copy to the old destination registers.
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, OpName);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, OpName);
+
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
+}
+
+void SILoadStoreOptimizer::copyToDestRegs(
+ CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore, int OpName,
+ Register DestReg) const {
+ auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
+ copyToDestRegs(CI, Paired, InsertBefore, OpName, DestReg, SubRegIdx0,
+ SubRegIdx1);
+}
+
+// Return a register for the source of the merged store after copying the
+// originalsource regs of CI and Paired into it.
+Register
+SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore,
+ int OpName) const {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
+
+ // Copy to the new source register.
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
+ Register SrcReg = MRI->createVirtualRegister(SuperRC);
+
+ const auto *Src0 = TII->getNamedOperand(*CI.I, OpName);
+ const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName);
+
+ BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ .add(*Src0)
+ .addImm(SubRegIdx0)
+ .add(*Src1)
+ .addImm(SubRegIdx1);
+
+ return SrcReg;
+}
+
unsigned SILoadStoreOptimizer::read2Opcode(unsigned EltSize) const {
if (STM->ldsRequiresM0Init())
return (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
@@ -1214,9 +1284,6 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
// cases, like vectors of pointers.
const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
- const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
- const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst);
-
unsigned NewOffset0 = CI.Offset;
unsigned NewOffset1 = Paired.Offset;
unsigned Opc =
@@ -1267,17 +1334,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
.addImm(0) // gds
.cloneMergedMemRefs({&*CI.I, &*Paired.I});
- (void)Read2;
-
- const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
-
- // Copy to the old destination registers.
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg,
+ SubRegIdx0, SubRegIdx1);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1397,19 +1455,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the old destination registers.
- const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
- const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
-
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1441,19 +1487,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
New.addImm(MergedOffset);
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the old destination registers.
- const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
- const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst);
- const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::sdst);
-
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1494,19 +1528,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the old destination registers.
- const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
- const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
-
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1551,19 +1573,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the old destination registers.
- const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
- const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
-
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1578,20 +1588,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
const unsigned Opcode = getNewOpcode(CI, Paired);
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the new source register.
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
- Register SrcReg = MRI->createVirtualRegister(SuperRC);
-
- const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
-
- BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
- .add(*Src0)
- .addImm(SubRegIdx0)
- .add(*Src1)
- .addImm(SubRegIdx1);
+ Register SrcReg =
+ copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
@@ -1645,19 +1643,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
.addImm(CI.CPol)
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the old destination registers.
- const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
- const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
- const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst);
-
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- BuildMI(*MBB, InsertBefore, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1672,20 +1658,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
const unsigned Opcode = getNewOpcode(CI, Paired);
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the new source register.
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
- Register SrcReg = MRI->createVirtualRegister(SuperRC);
-
- const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
-
- BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
- .add(*Src0)
- .addImm(SubRegIdx0)
- .add(*Src1)
- .addImm(SubRegIdx1);
+ Register SrcReg =
+ copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
@@ -1868,7 +1842,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
const TargetRegisterClass *
SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
- const CombineInfo &Paired) {
+ const CombineInfo &Paired) const {
if (CI.InstClass == S_BUFFER_LOAD_IMM ||
CI.InstClass == S_BUFFER_LOAD_SGPR_IMM || CI.InstClass == S_LOAD_IMM) {
switch (CI.Width + Paired.Width) {
@@ -1901,20 +1875,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
const unsigned Opcode = getNewOpcode(CI, Paired);
- auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
-
- // Copy to the new source register.
- const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
- Register SrcReg = MRI->createVirtualRegister(SuperRC);
-
- const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
- const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
-
- BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
- .add(*Src0)
- .addImm(SubRegIdx0)
- .add(*Src1)
- .addImm(SubRegIdx1);
+ Register SrcReg =
+ copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
arsenm
approved these changes
May 2, 2024
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Factor out copyToDestRegs and copyFromSrcRegs for merging store sources
and unmerging load results. NFC.