Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1898,7 +1898,7 @@ class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
!eq(VT.Size, 16) : !if(IsTrue16,
!if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
!if(IsFake16, VGPROp_32_Lo128, VGPROp_16_Lo128),
RegisterOperand<VGPR_32>),
1 : RegisterOperand<VGPR_32>);
}
Expand Down Expand Up @@ -1950,6 +1950,20 @@ class getVOP3VRegSrcForVT<ValueType VT> {
1 : VRegSrc_32);
}

// VGPR only VOP3 src with 8 bit encoding e.g. VOP3DPP src0.
class getVGPRSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
RegisterOperand ret =
!cond(!eq(VT.Size, 128) : VGPROp_128,
!eq(VT.Size, 96) : VGPROp_96,
!eq(VT.Size, 64) : VGPROp_64,
!eq(VT.Size, 48) : VGPROp_64,
!eq(VT.Size, 16) : !if(IsTrue16,
!if(IsFake16, VGPROp_32,
VGPROp_16),
VGPROp_32),
1 : VGPROp_32);
}

// Src2 of VOP3 DPP instructions cannot be a literal
class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
RegisterOperand ret =
Expand Down Expand Up @@ -2681,7 +2695,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
field RegisterOperand Src0VOP3DPP = getVGPRSrcForVT<Src0VT>.ret;
field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
Expand Down Expand Up @@ -2897,7 +2911,7 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32);
let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPROp_16, VGPROp_32);
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
Expand Down
48 changes: 19 additions & 29 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1307,51 +1307,41 @@ def VRegSrc_fake16: SrcReg9<VGPR_32> {
let EncoderMethod = "getMachineOpValueT16";
}
//===----------------------------------------------------------------------===//
// VGPRSrc_*
// VGPROp_* An 8-bit RegisterOperand wrapper for a VGPR
//===----------------------------------------------------------------------===//

// An 8-bit RegisterOperand wrapper for a VGPR
def VGPRSrc_32 : RegisterOperand<VGPR_32> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
class VGPROp<RegisterClass regClass> : RegisterOperand<regClass> {
let DecoderMethod = "Decode" # regClass # "RegisterClass";
}
def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
class VGPROp_Align2<RegisterClass regClass> : RegisterOperand<!cast<RegisterClass>(regClass#_Align2)> {
let DecoderMethod = "Decode" # regClass # "RegisterClass";
}
def VGPRSrc_64 : RegisterOperand<VReg_64> {
let DecoderMethod = "DecodeVReg_64RegisterClass";
multiclass VGPROp_Aligned<RegisterClass regClass> {
def _Align1 : VGPROp<regClass>;
def _Align2 : VGPROp_Align2<regClass>;
}

def VGPRSrc_96 : RegisterOperand<VReg_96> {
let DecoderMethod = "DecodeVReg_96RegisterClass";
// TODO: These cases should use default target alignment
def VGPROp_16 : VGPROp<VGPR_16> {
let EncoderMethod = "getMachineOpValueT16";
}
def VGPROp_32 : VGPROp<VGPR_32>;

def VGPRSrc_128 : RegisterOperand<VReg_128> {
let DecoderMethod = "DecodeVReg_128RegisterClass";
foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "512", "1024"] in {
def VGPROp_#size : VGPROp<!cast<RegisterClass>("VReg_"#size)>;
}

def VGPRSrc_192 : RegisterOperand<VReg_192> {
let DecoderMethod = "DecodeVReg_192RegisterClass";
foreach size = ["64", "96", "128", "160", "256", "1024"] in {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm I don't know the direction you are going with the _Align1 Operands. They seem to be identical to the VGPROp without _Align1 suffix. I think this design makes sense because we don't need to generate _Align1 and _Align2 versions for every size yet.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are identical now but they soon will not be. We don't strictly need it but it shrinks the coming diff considerably

defm VGPROp_#size : VGPROp_Aligned<!cast<RegisterClass>("VReg_"#size)>;
}

def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
def VGPROp_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
let EncoderMethod = "getMachineOpValueT16Lo128";
}

// True 16 operands.
def VGPRSrc_16 : RegisterOperand<VGPR_16> {
let DecoderMethod = "DecodeVGPR_16RegisterClass";
let EncoderMethod = "getMachineOpValueT16";
}

// TODO: These cases should use default target alignment
def VGPROp_16 : RegisterOperand<VGPR_16>;
def VGPROp_32 : RegisterOperand<VGPR_32>;

foreach size = ["64", "96", "128", "160", "256", "1024" ] in {
def VGPROp_#size : RegisterOperand<!cast<RegisterClass>("VReg_"#size)>;
def VGPROp_#size#_Align1 : RegisterOperand<!cast<RegisterClass>("VReg_"#size)>;
def VGPROp_#size#_Align2 : RegisterOperand<!cast<RegisterClass>("VReg_"#size#_Align2)>;
def VGPROp_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
}

//===----------------------------------------------------------------------===//
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -416,12 +416,12 @@ def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
let Ins32 = (ins VSrcT_f16_Lo128:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm);
let Ins32 = (ins VSrcT_f16_Lo128:$src0, VGPROp_16_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm);
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPROp_32_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F32 : VOP_MADAK <f32>;
def VOP_MADAK_F64 : VOP_MADAK <f64>;
Expand Down Expand Up @@ -452,12 +452,12 @@ def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
let Ins32 = (ins VSrcT_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1);
let Ins32 = (ins VSrcT_f16_Lo128:$src0, ImmOpType:$imm, VGPROp_16_Lo128:$src1);
}
def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1);
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPROp_32_Lo128:$src1);
}
def VOP_MADMK_F32 : VOP_MADMK <f32>;
def VOP_MADMK_F64 : VOP_MADMK <f64>;
Expand Down Expand Up @@ -496,14 +496,14 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
HasClamp, HasModifiers, HasModifiers, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
// We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPROp_32:$src2X);
let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPROp_32:$src2Y);
let InsVOPD3X = (ins Src0ModVOPD3:$src0X_modifiers, Src0VOPD3:$src0X,
Src1ModVOPD3:$vsrc1X_modifiers, Src1RC32:$vsrc1X,
VGPRSrc_32:$src2X);
VGPROp_32:$src2X);
let InsVOPD3Y = (ins Src0ModVOPD3:$src0Y_modifiers, Src0VOPD3:$src0Y,
Src1ModVOPD3:$vsrc1Y_modifiers, Src1RC32:$vsrc1Y,
VGPRSrc_32:$src2Y);
VGPROp_32:$src2Y);

let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
Expand Down Expand Up @@ -565,7 +565,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
Expand Down Expand Up @@ -597,7 +597,7 @@ def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
dpp8:$dpp8, Dpp8FI:$fi);
let DstRC64 = getVALUDstForVT<DstVT>.ret;
let Src0VOP3DPP = VGPRSrc_32;
let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
Expand Down Expand Up @@ -796,7 +796,7 @@ def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
Src2RC64, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3VC<f16, 0/*IsFake16*/>.ret;
Expand All @@ -808,7 +808,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;

let Src0VOP3DPP = VGPRSrc_32;
let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3VC<f16, 1/*IsFake16*/>.ret;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
FP16InputMods:$src1_modifiers, Src1RC:$src1,
FP16InputMods:$src2_modifiers, Src2RC:$src2);
dag dpp_srcs =
(ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0,
(ins FPVRegInputMods:$src0_modifiers, VGPROp_32:$src0,
FPVRegInputMods:$src1_modifiers, VRegSrc_32:$src1,
FP16InputMods:$src2_modifiers, Src2RC:$src2);

Expand Down
18 changes: 9 additions & 9 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;

Expand All @@ -126,7 +126,7 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_32;
let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;

Expand Down Expand Up @@ -173,7 +173,7 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;

Expand All @@ -197,7 +197,7 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_32;
let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;

Expand Down Expand Up @@ -892,7 +892,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
// DPP8 forbids modifiers and can inherit from VOPC_Profile

let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1);
dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPROp_32:$src0, VCSrc_b32:$src1);
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
(ins)));
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
Expand All @@ -915,7 +915,7 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;

Expand All @@ -941,7 +941,7 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_32;
let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;

Expand Down Expand Up @@ -985,7 +985,7 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0VOP3DPP = VGPROp_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;

Expand All @@ -1009,7 +1009,7 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_32;
let Src0VOP3DPP = VGPROp_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOPDInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ foreach Gen = [GFX11GenD, GFX12GenD, GFX1250GenD] in {
defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2) # Gen.Suffix;
defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
defvar outs = (outs VGPROp_32:$vdstX, VOPDDstYOperand:$vdstY);
if !or(isOpXMADK, isOpYMADK) then {
// If Both X and Y are MADK, the mandatory literal of X additionally must
// use an alternate operand format which defers to the 'real' Y literal.
Expand Down