Skip to content

Commit

Permalink
[AMDGPU][GlobalISel] Add IMG init in selectImageIntrinsic
Browse files Browse the repository at this point in the history
Doing this during instruction selection avoids the cost of running
SIAddIMGInit which is yet another pass over the MIR.

Differential Revision: https://reviews.llvm.org/D99670
  • Loading branch information
jayfoad committed Apr 1, 2021
1 parent 4af6251 commit 3d07a6d
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Expand Up @@ -1689,6 +1689,38 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (BaseOpcode->HasD16)
MIB.addImm(IsD16 ? -1 : 0);

if (IsTexFail) {
// An image load instruction with TFE/LWE only conditionally writes to its
// result registers. Initialize them to zero so that we always get well
// defined result values.
assert(VDataOut && !VDataIn);
Register Tied = MRI->cloneVirtualRegister(VDataOut);
Register Zero = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero)
.addImm(0);
auto Parts = TRI.getRegSplitParts(MRI->getRegClass(Tied), 4);
if (STI.usePRTStrictNull()) {
// With enable-prt-strict-null enabled, initialize all result registers to
// zero.
auto RegSeq =
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
for (auto Sub : Parts)
RegSeq.addReg(Zero).addImm(Sub);
} else {
// With enable-prt-strict-null disabled, only initialize the extra TFE/LWE
// result register.
Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
auto RegSeq =
BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
for (auto Sub : Parts.drop_back(1))
RegSeq.addReg(Undef).addImm(Sub);
RegSeq.addReg(Zero).addImm(Parts.back());
}
MIB.addReg(Tied, RegState::Implicit);
MIB->tieOperands(0, MIB->getNumOperands() - 1);
}

MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -1133,10 +1133,6 @@ void GCNPassConfig::addPreGlobalInstructionSelect() {

bool GCNPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect(getOptLevel()));
// TODO: Fix instruction selection to do the right thing for image
// instructions with tfe or lwe in the first place, instead of running a
// separate pass to fix them up?
addPass(createSIAddIMGInitPass());
return false;
}

Expand Down

0 comments on commit 3d07a6d

Please sign in to comment.