Skip to content

Commit

Permalink
Reapply "[AMDGPU] Introduce real and keep fake True16 instructions."
Browse files Browse the repository at this point in the history
Reverts 6cb3866.

Analysis of failures on buildbots with expensive checks enabled showed
that the problem was triggered by changes in another commit,
469b3bf, and was caused by the bug
addressed in #67245.
  • Loading branch information
kosarev committed Sep 23, 2023
1 parent f435f55 commit fab28e0
Show file tree
Hide file tree
Showing 14 changed files with 144 additions and 66 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1693,6 +1693,15 @@ def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;

// Control use of True16 instructions. The real True16 instructions are
// True16 instructions as they are defined in the ISA. Fake True16
// instructions have the same encoding as real ones but syntactically
// only allow 32-bit registers in operands and use low halves thereof.
def UseRealTrue16Insts : Predicate<"Subtarget->useRealTrue16Insts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
def UseFakeTrue16Insts : Predicate<"Subtarget->hasTrue16BitInsts() && "
"!Subtarget->useRealTrue16Insts()">;

def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;

Expand Down
19 changes: 13 additions & 6 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
Res =
tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
MI, DecW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
Expand Down Expand Up @@ -463,15 +466,17 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
MI = MCInst(); // clear

Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
Res = tryDecodeInst(DecoderTableDPP8GFX1164,
DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear

Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;

Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
MI, QW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI);
Expand Down Expand Up @@ -532,7 +537,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
if (Res) break;

Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
Address, CS);
if (Res) break;

if (Bytes.size() < 4) break;
Expand Down Expand Up @@ -562,7 +568,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;

Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
Address, CS);
if (Res)
break;

Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ class AMDGPUDisassembler : public MCDisassembler {
return MCDisassembler::Fail;
}

template <typename InsnType>
DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
MCInst &MI, InsnType Inst, uint64_t Address,
raw_ostream &Comments) const {
for (const uint8_t *T : {Table1, Table2}) {
if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
return Res;
}
return MCDisassembler::Fail;
}

std::optional<DecodeStatus>
onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &CStream) const override;
Expand Down
14 changes: 10 additions & 4 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
case AMDGPU::V_MAX_F32_e64:
case AMDGPU::V_MAX_F16_e64:
case AMDGPU::V_MAX_F16_t16_e64:
case AMDGPU::V_MAX_F16_fake16_e64:
case AMDGPU::V_MAX_F64_e64:
case AMDGPU::V_PK_MAX_F16: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
Expand Down Expand Up @@ -1503,7 +1504,8 @@ static int getOModValue(unsigned Opc, int64_t Val) {
}
}
case AMDGPU::V_MUL_F16_e64:
case AMDGPU::V_MUL_F16_t16_e64: {
case AMDGPU::V_MUL_F16_t16_e64:
case AMDGPU::V_MUL_F16_fake16_e64: {
switch (static_cast<uint16_t>(Val)) {
case 0x3800: // 0.5
return SIOutMods::DIV2;
Expand All @@ -1530,12 +1532,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_MUL_F64_e64:
case AMDGPU::V_MUL_F32_e64:
case AMDGPU::V_MUL_F16_t16_e64:
case AMDGPU::V_MUL_F16_fake16_e64:
case AMDGPU::V_MUL_F16_e64: {
// If output denormals are enabled, omod is ignored.
if ((Op == AMDGPU::V_MUL_F32_e64 &&
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64) &&
Op == AMDGPU::V_MUL_F16_t16_e64 ||
Op == AMDGPU::V_MUL_F16_fake16_e64) &&
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
return std::pair(nullptr, SIOutMods::NONE);

Expand Down Expand Up @@ -1565,12 +1569,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_ADD_F64_e64:
case AMDGPU::V_ADD_F32_e64:
case AMDGPU::V_ADD_F16_e64:
case AMDGPU::V_ADD_F16_t16_e64: {
case AMDGPU::V_ADD_F16_t16_e64:
case AMDGPU::V_ADD_F16_fake16_e64: {
// If output denormals are enabled, omod is ignored.
if ((Op == AMDGPU::V_ADD_F32_e64 &&
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64) &&
Op == AMDGPU::V_ADD_F16_t16_e64 ||
Op == AMDGPU::V_ADD_F16_fake16_e64) &&
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
return std::pair(nullptr, SIOutMods::NONE);

Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2262,6 +2262,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field list<ValueType> ArgVT = _ArgVT;
field bit EnableClamp = _EnableClamp;
field bit IsTrue16 = 0;
field bit IsRealTrue16 = 0;

field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
Expand Down Expand Up @@ -2453,6 +2454,21 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this
// class, so copy changes to this class in those profiles
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
// Most DstVT are 16-bit, but not all.
let DstRC = getVALUDstForVT_t16<DstVT>.ret;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
}

class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
let IsTrue16 = 1;
// Most DstVT are 16-bit, but not all
let DstRC = getVALUDstForVT_t16<DstVT>.ret;
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1641,8 +1641,10 @@ def : ClampPat<V_MAX_F32_e64, f32>;
def : ClampPat<V_MAX_F64_e64, f64>;
let SubtargetPredicate = NotHasTrue16BitInsts in
def : ClampPat<V_MAX_F16_e64, f16>;
let SubtargetPredicate = HasTrue16BitInsts in
let SubtargetPredicate = UseRealTrue16Insts in
def : ClampPat<V_MAX_F16_t16_e64, f16>;
let SubtargetPredicate = UseFakeTrue16Insts in
def : ClampPat<V_MAX_F16_fake16_e64, f16>;

let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
Expand Down Expand Up @@ -2696,12 +2698,12 @@ def : GCNPat<
let OtherPredicates = [HasTrue16BitInsts] in {
def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
(V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
(V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
>;

def : GCNPat<
(fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
(V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
(V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
>;
} // End OtherPredicates

Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ multiclass VOP1Inst_t16<string opName,
defm NAME : VOP1Inst<opName, P, node>;
}
let OtherPredicates = [HasTrue16BitInsts] in {
defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
}
}

Expand All @@ -170,7 +170,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
}

class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {

let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod);
Expand Down Expand Up @@ -199,7 +199,7 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
let HasOMod = 1;
}

Expand Down Expand Up @@ -292,13 +292,13 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
let OtherPredicates = [HasTrue16BitInsts] in
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
} // End FPDPRounding = 1, isReMaterializable = 0

let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
let OtherPredicates = [HasTrue16BitInsts] in
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
Expand Down
29 changes: 21 additions & 8 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,12 @@ multiclass VOP2Inst_t16<string opName,
let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in {
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
}
let SubtargetPredicate = HasTrue16BitInsts in {
let SubtargetPredicate = UseRealTrue16Insts in {
defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
}
let SubtargetPredicate = UseFakeTrue16Insts in {
defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>;
}
}

// Creating a _t16_e32 pseudo when there is no corresponding real instruction on
Expand All @@ -212,7 +215,7 @@ multiclass VOP2Inst_e64_t16<string opName,
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
}
let SubtargetPredicate = HasTrue16BitInsts in {
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>;
}
}

Expand Down Expand Up @@ -874,7 +877,7 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
let HasSrc1FloatMods = 0;
let Src1ModSDWA = Int16SDWAInputMods;
}
def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
let Src1DPP = VGPR_32_Lo128;
let Src1ModDPP = IntT16VRegInputMods;
Expand Down Expand Up @@ -925,9 +928,9 @@ def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;

let SubtargetPredicate = isGFX11Plus in {
let isCommutable = 1 in {
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX11Plus

Expand Down Expand Up @@ -1307,6 +1310,8 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName,
string asmName, bit single = 0> {
defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "GFX11", "GFX11_FAKE16"),
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in
def _e32_gfx11 :
VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>,
VOP2e<op{5-0}, ps.Pfl> {
Expand All @@ -1331,7 +1336,8 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
SIEncodingFamily.GFX11> {
let AsmString = asmName # ps.Pfl.AsmDPP16;
let DecoderNamespace = "DPPGFX11";
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPPGFX11", "DPPGFX11_FAKE16");
let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only);
}
}
multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
Expand All @@ -1340,7 +1346,8 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
if ps.Pfl.HasExtDPP then
def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
let AsmString = asmName # ps.Pfl.AsmDPP8;
let DecoderNamespace = "DPP8GFX11";
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPP8GFX11", "DPP8GFX11_FAKE16");
let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only);
}
}

Expand Down Expand Up @@ -1491,13 +1498,19 @@ defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f,
defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>;

defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">;
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">;
defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">;

Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1358,6 +1358,8 @@ let AssemblerPredicate = isGFX11Only,
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3OpSel_gfx11<op, ps.Pfl>;
if !not(ps.Pfl.HasOpSel) then
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "GFX11", "GFX11_FAKE16"),
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in
def _e64_gfx11 :
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3e_gfx11<op, ps.Pfl>;
Expand Down Expand Up @@ -1388,7 +1390,9 @@ let AssemblerPredicate = isGFX11Only,
multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in {
let AsmString = asmName # ps.Pfl.AsmVOP3DPP16,
DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPPGFX11", "DPPGFX11_FAKE16"),
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in {
defm NAME : VOP3_Real_dpp_Base_gfx11<op, opName>;
}
}
Expand All @@ -1411,7 +1415,9 @@ let AssemblerPredicate = isGFX11Only,
multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in {
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPP8GFX11", "DPP8GFX11_FAKE16"),
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in {
defm NAME : VOP3_Real_dpp8_Base_gfx11<op, opName>;
}
}
Expand Down

0 comments on commit fab28e0

Please sign in to comment.