Skip to content

Commit

Permalink
[AMDGPU] Use new opcode for indexed vgpr reads
Browse files Browse the repository at this point in the history
Introduce V_MOV_B32_indirect_read for indexed vgpr reads
(and rename the old V_MOV_B32_indirect to
V_MOV_B32_indirect_write) so they can be unambiguously
distinguished from regular V_MOV_B32_e32. Previously they
were distinguished by looking for extra implicit operands
but this is fragile because regular moves sometimes have
extra implicit operands too:
- either by accident, when instructions end up with
  duplicate implicit operands (see e.g. D100939)
- or by design, when SIInstrInfo::copyPhysReg breaks a
  multi-dword copy into individual subreg mov instructions
  and adds implicit operands for the super-register.

The effect of this is that SIInstrInfo::isFoldableCopy can
be simplified and identifies more foldable copies. The test
diffs show that more immediate 0 values have been folded as
inline operands.

SIInstrInfo::isReallyTriviallyReMaterializable could
probably be simplified too but that is not part of this
patch.

Differential Revision: https://reviews.llvm.org/D114230
  • Loading branch information
jayfoad committed Nov 19, 2021
1 parent 049799c commit 30b27ec
Show file tree
Hide file tree
Showing 13 changed files with 2,330 additions and 2,398 deletions.
13 changes: 3 additions & 10 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -1905,7 +1905,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
SetOn->getOperand(3).setIsUndef();

const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect);
const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect_write);
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, OpDesc)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
Expand Down Expand Up @@ -1945,7 +1945,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
SetOn->getOperand(3).setIsUndef();

BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32))
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_indirect_read))
.addDef(Dst)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
.addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0))
Expand Down Expand Up @@ -2716,14 +2716,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO: {
// If there are additional implicit register operands, this may be used for
// register indexing so the source register operand isn't simply copied.
unsigned NumOps = MI.getDesc().getNumOperands() +
MI.getDesc().getNumImplicitUses();

return MI.getNumOperands() == NumOps;
}
case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
Expand Up @@ -257,10 +257,8 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
})) {
// The only exception allowed here is another indirect vector move
// with the same mode.
if (!IdxOn ||
!((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
return false;
}
}
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/AMDGPU/VOP1Instructions.td
Expand Up @@ -863,14 +863,25 @@ defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>;
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
// and an implicit use and def of the super register should be added.
def V_MOV_B32_indirect : VPseudoInstSI<(outs),
def V_MOV_B32_indirect_write : VPseudoInstSI<(outs),
(ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>,
PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
getVOPSrc0ForVT<i32>.ret:$src0)> {
let VOP1 = 1;
let SubtargetPredicate = isGFX8GFX9;
}

// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the
// super register should be added.
def V_MOV_B32_indirect_read : VPseudoInstSI<
(outs getVALUDstForVT<i32>.ret:$vdst),
(ins getVOPSrc0ForVT<i32>.ret:$src0)>,
PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
getVOPSrc0ForVT<i32>.ret:$src0)> {
let VOP1 = 1;
let SubtargetPredicate = isGFX8GFX9;
}

let OtherPredicates = [isGFX8Plus] in {

def : GCNPat <
Expand Down

0 comments on commit 30b27ec

Please sign in to comment.