Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,11 @@ class SILoadStoreOptimizer {

void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
AMDGPU::OpName OpName, Register DestReg) const;
const DebugLoc &DL, AMDGPU::OpName OpName,
Register DestReg) const;
Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
AMDGPU::OpName OpName) const;
const DebugLoc &DL, AMDGPU::OpName OpName) const;

unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
Expand Down Expand Up @@ -1320,10 +1321,9 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
// Paired.
void SILoadStoreOptimizer::copyToDestRegs(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore, AMDGPU::OpName OpName,
Register DestReg) const {
MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL,
AMDGPU::OpName OpName, Register DestReg) const {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();

auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);

Expand Down Expand Up @@ -1351,9 +1351,9 @@ void SILoadStoreOptimizer::copyToDestRegs(
Register
SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore,
const DebugLoc &DL,
AMDGPU::OpName OpName) const {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();

auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);

Expand Down Expand Up @@ -1409,7 +1409,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Register DestReg = MRI->createVirtualRegister(SuperRC);

DebugLoc DL = CI.I->getDebugLoc();
DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
Expand Down Expand Up @@ -1437,7 +1438,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
.addImm(0) // gds
.cloneMergedMemRefs({&*CI.I, &*Paired.I});

copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);

CI.I->eraseFromParent();
Paired.I->eraseFromParent();
Expand Down Expand Up @@ -1491,7 +1492,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
(NewOffset0 != NewOffset1) && "Computed offset doesn't fit");

const MCInstrDesc &Write2Desc = TII->get(Opc);
DebugLoc DL = CI.I->getDebugLoc();
DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

Register BaseReg = AddrReg->getReg();
unsigned BaseSubReg = AddrReg->getSubReg();
Expand Down Expand Up @@ -1532,7 +1534,9 @@ MachineBasicBlock::iterator
SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Expand All @@ -1557,7 +1561,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,

MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));

copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);

CI.I->eraseFromParent();
Paired.I->eraseFromParent();
Expand All @@ -1568,7 +1572,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
Expand All @@ -1589,7 +1595,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
New.addImm(MergedOffset);
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));

copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::sdst, DestReg);

CI.I->eraseFromParent();
Paired.I->eraseFromParent();
Expand All @@ -1600,7 +1606,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();

DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

Expand Down Expand Up @@ -1630,7 +1638,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));

copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);

CI.I->eraseFromParent();
Paired.I->eraseFromParent();
Expand All @@ -1641,7 +1649,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();

DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

Expand Down Expand Up @@ -1681,7 +1691,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));

copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata, DestReg);

CI.I->eraseFromParent();
Paired.I->eraseFromParent();
Expand All @@ -1692,12 +1702,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

Register SrcReg =
copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);

auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
Expand Down Expand Up @@ -1739,7 +1750,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();

DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

Expand All @@ -1757,7 +1770,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
.addImm(CI.CPol)
.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));

copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
copyToDestRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdst, DestReg);

CI.I->eraseFromParent();
Paired.I->eraseFromParent();
Expand All @@ -1768,12 +1781,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();

DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

Register SrcReg =
copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);

auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
Expand Down Expand Up @@ -2042,12 +2057,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
DebugLoc DL =
DebugLoc::getMergedLocation(CI.I->getDebugLoc(), Paired.I->getDebugLoc());

const unsigned Opcode = getNewOpcode(CI, Paired);

Register SrcReg =
copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
copyFromSrcRegs(CI, Paired, InsertBefore, DL, AMDGPU::OpName::vdata);

auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
Expand Down
Loading