Skip to content

Commit

Permalink
[AMDGPU] Make more use of madmk/fmamk instructions
Browse files Browse the repository at this point in the history
In convertToThreeAddress handle VOP2 mac/fmac instructions with a
literal src0 operand, since these are prime candidates for
converting to madmk/fmamk.

Previously this would only happen if src0 (or src1) was a register
defined by a move-immediate instruction, but in many cases these
operands have already been folded because SIFoldOperands runs
before TwoAddressInstructionPass.

Differential Revision: https://reviews.llvm.org/D120736
  • Loading branch information
jayfoad committed Mar 2, 2022
1 parent d813116 commit 8bed52c
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 148 deletions.
22 changes: 17 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -3255,6 +3255,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
bool Src0Literal = false;

switch (Opc) {
default:
Expand All @@ -3281,7 +3282,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return nullptr;

if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
return nullptr;
Src0Literal = true;

break;
}
Expand Down Expand Up @@ -3319,7 +3320,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
};

int64_t Imm;
if (getFoldableImm(Src2, Imm, &DefMI)) {
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
Expand All @@ -3339,7 +3340,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
if (getFoldableImm(Src1, Imm, &DefMI)) {
if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
Expand All @@ -3353,7 +3354,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
}
if (getFoldableImm(Src0, Imm, &DefMI)) {
if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) {
if (Src0Literal) {
Imm = Src0->getImm();
DefMI = nullptr;
}
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
Expand All @@ -3366,12 +3371,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
killDef();
if (DefMI)
killDef();
return MIB;
}
}
}

// VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
// because VOP3 does not allow a literal operand.
// TODO: Remove this restriction for GFX10.
if (Src0Literal)
return nullptr;

unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
: IsF64 ? AMDGPU::V_FMA_F64_e64
: IsLegacy
Expand Down

0 comments on commit 8bed52c

Please sign in to comment.