Skip to content
Permalink
Browse files

AMDGPU/GlobalISel: Select fmed3

llvm-svn: 371435
  • Loading branch information...
arsenm committed Sep 9, 2019
1 parent 79f0d3a commit d6c1f5bb154a0b524b92d15b99a882d654f906ce
@@ -34,6 +34,14 @@ def gi_vop3omods :
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
GIComplexPatternEquiv<VOP3OMods>;

def gi_vop3opselmods0 :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
GIComplexPatternEquiv<VOP3OpSelMods0>;

def gi_vop3opselmods :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
GIComplexPatternEquiv<VOP3OpSelMods>;

def gi_smrd_imm :
GIComplexOperandMatcher<s64, "selectSmrdImm">,
GIComplexPatternEquiv<SMRDImm>;
@@ -319,7 +319,7 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
[]
>;

def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;

def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
@@ -437,3 +437,7 @@ def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src),
def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_ldexp node:$src0, node:$src1),
(AMDGPUldexp_impl node:$src0, node:$src1)]>;

def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2),
(AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>;
@@ -1503,6 +1503,25 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
}};
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
// FIXME: Handle clamp and op_sel
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp
}};
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
// FIXME: Handle op_sel
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
}};
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
MachineRegisterInfo &MRI =
@@ -114,6 +114,11 @@ class AMDGPUInstructionSelector : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
@@ -0,0 +1,205 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

---
name: fmed3_s32_vvvv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-LABEL: name: fmed3_s32_vvvv
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2
S_ENDPGM 0, implicit %3
...

---
name: fmed3_s32_vsvv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmed3_s32_vsvv
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = COPY $vgpr1
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2
S_ENDPGM 0, implicit %3
...

---
name: fmed3_s32_vvsv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmed3_s32_vvsv
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = COPY $vgpr1
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2
S_ENDPGM 0, implicit %3
...

---
name: fmed3_s32_vvvs
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmed3_s32_vvvs
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = COPY $sgpr0
%3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2
S_ENDPGM 0, implicit %3
...


# Same SGPR used, so doesn't violate the constant bus restriction.
---
name: fmed3_s32_vssv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmed3_s32_vssv
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %1
S_ENDPGM 0, implicit %2
...

---
name: fmed3_s32_vsvs
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmed3_s32_vsvs
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %0
S_ENDPGM 0, implicit %2
...

---
name: fmed3_s32_vvss
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmed3_s32_vvss
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %1, %0, %0
S_ENDPGM 0, implicit %2
...

---
name: fmed3_s32_vsss
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0
; GCN-LABEL: name: fmed3_s32_vsss
; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0
S_ENDPGM 0, implicit %1
...


# FIXME: This should probably have been fixed by RegBankSelect, but we should fail to select it.
# ---
# name: fmed3_s32_vssv_constant_bus_violation
# legalized: true
# regBankSelected: true
# tracksRegLiveness: true

# body: |
# bb.0:
# liveins: $sgpr0, $sgpr1, $vgpr0

# %0:sgpr(s32) = COPY $sgpr0
# %1:sgpr(s32) = COPY $sgpr1
# %2:vgpr(s32) = COPY $vgpr0
# %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2
# S_ENDPGM 0, implicit %3
# ...
@@ -0,0 +1,61 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s

# VI-ERR-NOT: remark
# VI-ERR: remark: <unknown>:0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:vgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vvvv)
# VI-ERR-NEXT: remark: <unknown>:0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:sgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vsvv)
# VI-ERR-NOT: remark
---
name: fmed3_s16_vvvv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-LABEL: name: fmed3_s16_vvvv
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s16) = G_TRUNC %0
%4:vgpr(s16) = G_TRUNC %1
%5:vgpr(s16) = G_TRUNC %2
%6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5
S_ENDPGM 0, implicit %6
...

---
name: fmed3_s16_vsvv
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; GCN-LABEL: name: fmed3_s16_vsvv
; GCN: liveins: $sgpr0, $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = COPY $vgpr1
%3:sgpr(s16) = G_TRUNC %0
%4:vgpr(s16) = G_TRUNC %1
%5:vgpr(s16) = G_TRUNC %2
%6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5
S_ENDPGM 0, implicit %6
...

0 comments on commit d6c1f5b

Please sign in to comment.
You can’t perform that action at this time.