Skip to content

Commit

Permalink
[AMDGPU] Fix shrinking of F16 FMA on newer subtargets
Browse files Browse the repository at this point in the history
D125803 introduced shrinking of F16 FMA to FMAAK/FMAMK in
SIShrinkInstructions (useful on GFX10+ where VOP3 instructions may have
a literal operand) but failed to handle the V_FMA_F16_gfx9_e64 form of
the opcode which is used on GFX9+.

Differential Revision: https://reviews.llvm.org/D133489
  • Loading branch information
jayfoad committed Sep 8, 2022
1 parent de0e311 commit afa0ed3
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 23 deletions.
9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2887,12 +2887,15 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
static constexpr unsigned ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
AMDGPU::OpName::omod};
AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};

void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
for (unsigned Name : reverse(ModifierOpNames))
MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, Name));
for (unsigned Name : reverse(ModifierOpNames)) {
int Idx = AMDGPU::getNamedOperandIdx(Opc, Name);
if (Idx >= 0)
MI.removeOperand(Idx);
}
}

bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
NewOpcode = AMDGPU::V_MADAK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
NewOpcode = AMDGPU::V_FMAAK_F16;
break;
}
Expand Down Expand Up @@ -409,6 +410,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
NewOpcode = AMDGPU::V_MADMK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
NewOpcode = AMDGPU::V_FMAMK_F16;
break;
}
Expand Down Expand Up @@ -852,7 +854,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
shrinkMadFma(MI);
continue;
}
Expand Down
23 changes: 8 additions & 15 deletions llvm/test/CodeGen/AMDGPU/fma.f16.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-SDAG
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10

declare half @llvm.fma.f16(half, half, half)
declare half @llvm.maxnum.f16(half, half)
Expand Down Expand Up @@ -58,19 +58,12 @@ define half @test_fmaak(half %x, half %y, half %z) {
; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: test_fmaak:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SDAG-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-GISEL-LABEL: test_fmaak:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_fma_f16 v0, v0, v1, 0x4200
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: test_fmaak:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200)
ret half %r
}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = V_FMA_F16_gfx9_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...

Expand All @@ -207,7 +207,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...

Expand All @@ -222,7 +222,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...

Expand All @@ -237,6 +237,6 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...

0 comments on commit afa0ed3

Please sign in to comment.