Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Select llvm.amdgcn.fmad.ftz
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Dec 30, 2019
1 parent 987eb8e commit 1247865
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 4 deletions.
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
Expand Up @@ -246,7 +246,7 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;

def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;

// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
Expand Down Expand Up @@ -461,3 +461,7 @@ def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1),
def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1),
(AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>;

def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2),
(AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>;
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -782,13 +782,14 @@ defm : FMADPat <f32, V_MAC_F32_e64>;

class FMADModsPat<Instruction inst, SDPatternOperator mad_opr, ValueType Ty>
: GCNPat<
(Ty (mad_opr (VOP3Mods Ty:$src0, i32:$src0_mod),
(VOP3Mods Ty:$src1, i32:$src1_mod),
(VOP3Mods Ty:$src2, i32:$src2_mod))),
(Ty (mad_opr (Ty (VOP3Mods Ty:$src0, i32:$src0_mod)),
(Ty (VOP3Mods Ty:$src1, i32:$src1_mod)),
(Ty (VOP3Mods Ty:$src2, i32:$src2_mod)))),
(inst $src0_mod, $src0, $src1_mod, $src1,
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;

// FIXME: This should select to V_MAC_F32
def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz, f32>;
def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
let SubtargetPredicate = Has16BitInsts;
Expand Down
233 changes: 233 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir
@@ -0,0 +1,233 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

---
name: fmad_ftz_s32_vvvv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-LABEL: name: fmad_ftz_s32_vvvv
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
S_ENDPGM 0, implicit %3
...

---
name: fmad_ftz_s32_vsvv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmad_ftz_s32_vsvv
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = COPY $vgpr1
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
S_ENDPGM 0, implicit %3
...

---
name: fmad_ftz_s32_vvsv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmad_ftz_s32_vvsv
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = COPY $vgpr1
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
S_ENDPGM 0, implicit %3
...

---
name: fmad_ftz_s32_vvvs
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmad_ftz_s32_vvvs
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = COPY $sgpr0
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
S_ENDPGM 0, implicit %3
...


# Same SGPR used, so doesn't violate the constant bus restriction.
---
name: fmad_ftz_s32_vssv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmad_ftz_s32_vssv
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %1
S_ENDPGM 0, implicit %2
...

---
name: fmad_ftz_s32_vsvs
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmad_ftz_s32_vsvs
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %0
S_ENDPGM 0, implicit %2
...

---
name: fmad_ftz_s32_vvss
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmad_ftz_s32_vvss
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1, %0, %0
S_ENDPGM 0, implicit %2
...

---
name: fmad_ftz_s32_vsss
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmad_ftz_s32_vsss
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0
S_ENDPGM 0, implicit %1
...


# FIXME: This should probably have been fixed by RegBankSelect, but we should fail to select it.
# ---
# name: fmad_ftz_s32_vssv_constant_bus_violation
# legalized: true
# regBankSelected: true
# tracksRegLiveness: true

# body: |
# bb.0:
# liveins: $sgpr0, $sgpr1, $vgpr0

# %0:sgpr(s32) = COPY $sgpr0
# %1:sgpr(s32) = COPY $sgpr1
# %2:vgpr(s32) = COPY $vgpr0
# %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
# S_ENDPGM 0, implicit %3
# ...

---
name: fmad_ftz_s32_vvv_fneg_v
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-LABEL: name: fmad_ftz_s32_vvv_fneg_v
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_FNEG %2
%4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %3
S_ENDPGM 0, implicit %4
...

0 comments on commit 1247865

Please sign in to comment.