From 72cf62c48f52de3ffe62d42f5bebe27e41cf559f Mon Sep 17 00:00:00 2001 From: Joseph Nash Date: Mon, 8 Sep 2025 10:55:33 -0400 Subject: [PATCH 1/2] [AMDGPU] Combine VGPRSrc and VGPROp definitions into VGPROp These can be represented by the same definition. It is just a RegisterOperand wrapper for a VGPR register class with a DecoderMethod override. NFC. --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 6 +-- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 48 ++++++++------------- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 24 +++++------ llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 18 ++++---- llvm/lib/Target/AMDGPU/VOPDInstructions.td | 2 +- 6 files changed, 45 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 562023cde44a4..7ca83eb4df035 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1898,7 +1898,7 @@ class getVregSrcForVT { !eq(VT.Size, 64) : RegisterOperand, !eq(VT.Size, 48) : RegisterOperand, !eq(VT.Size, 16) : !if(IsTrue16, - !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128), + !if(IsFake16, VGPROp_32_Lo128, VGPROp_16_Lo128), RegisterOperand), 1 : RegisterOperand); } @@ -2681,7 +2681,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field RegisterOperand Src0DPP = getVregSrcForVT.ret; field RegisterOperand Src1DPP = getVregSrcForVT.ret; field RegisterOperand Src2DPP = getVregSrcForVT.ret; - field RegisterOperand Src0VOP3DPP = VGPRSrc_32; + field RegisterOperand Src0VOP3DPP = VGPROp_32; field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; @@ -2897,7 +2897,7 @@ class VOPProfile_True16 : VOPProfile { let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32); + let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPROp_16, VGPROp_32); let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 162ef647be7e9..5f5eec49bab06 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1307,51 +1307,41 @@ def VRegSrc_fake16: SrcReg9 { let EncoderMethod = "getMachineOpValueT16"; } //===----------------------------------------------------------------------===// -// VGPRSrc_* +// VGPROp_* An 8-bit RegisterOperand wrapper for a VGPR //===----------------------------------------------------------------------===// -// An 8-bit RegisterOperand wrapper for a VGPR -def VGPRSrc_32 : RegisterOperand { - let DecoderMethod = "DecodeVGPR_32RegisterClass"; +class VGPROp : RegisterOperand { + let DecoderMethod = "Decode" # regClass # "RegisterClass"; } -def VGPRSrc_32_Lo128 : RegisterOperand { - let DecoderMethod = "DecodeVGPR_32RegisterClass"; +class VGPROp_Align2 : RegisterOperand(regClass#_Align2)> { + let DecoderMethod = "Decode" # regClass # "RegisterClass"; } -def VGPRSrc_64 : RegisterOperand { - let DecoderMethod = "DecodeVReg_64RegisterClass"; +multiclass VGPROp_Aligned { + def _Align1 : VGPROp; + def _Align2 : VGPROp_Align2; } -def VGPRSrc_96 : RegisterOperand { - let DecoderMethod = "DecodeVReg_96RegisterClass"; +// TODO: These cases should use default target alignment +def VGPROp_16 : VGPROp { + let EncoderMethod = "getMachineOpValueT16"; } +def VGPROp_32 : VGPROp; -def VGPRSrc_128 : RegisterOperand { - let DecoderMethod = "DecodeVReg_128RegisterClass"; +foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "512", "1024"] in { + def VGPROp_#size : VGPROp("VReg_"#size)>; } -def VGPRSrc_192 : RegisterOperand { - let DecoderMethod = "DecodeVReg_192RegisterClass"; +foreach size = ["64", "96", "128", "160", "256", "1024"] in { + defm VGPROp_#size : VGPROp_Aligned("VReg_"#size)>; } -def VGPRSrc_16_Lo128 : RegisterOperand { +def VGPROp_16_Lo128 : RegisterOperand { let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass"; let EncoderMethod = "getMachineOpValueT16Lo128"; } -// True 16 operands. -def VGPRSrc_16 : RegisterOperand { - let DecoderMethod = "DecodeVGPR_16RegisterClass"; - let EncoderMethod = "getMachineOpValueT16"; -} - -// TODO: These cases should use default target alignment -def VGPROp_16 : RegisterOperand; -def VGPROp_32 : RegisterOperand; - -foreach size = ["64", "96", "128", "160", "256", "1024" ] in { - def VGPROp_#size : RegisterOperand("VReg_"#size)>; - def VGPROp_#size#_Align1 : RegisterOperand("VReg_"#size)>; - def VGPROp_#size#_Align2 : RegisterOperand("VReg_"#size#_Align2)>; +def VGPROp_32_Lo128 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_32RegisterClass"; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 4f6b7c5923397..cff66aaedb11e 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -416,12 +416,12 @@ def VOP_MADAK_F16_t16 : VOP_MADAK { let IsTrue16 = 1; let IsRealTrue16 = 1; let DstRC = getVALUDstForVT.ret; - let Ins32 = (ins VSrcT_f16_Lo128:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm); + let Ins32 = (ins VSrcT_f16_Lo128:$src0, VGPROp_16_Lo128:$src1, ImmOpType:$imm); } def VOP_MADAK_F16_fake16 : VOP_MADAK { let IsTrue16 = 1; let DstRC = getVALUDstForVT_fake16.ret; - let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm); + let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPROp_32_Lo128:$src1, ImmOpType:$imm); } def VOP_MADAK_F32 : VOP_MADAK ; def VOP_MADAK_F64 : VOP_MADAK ; @@ -452,12 +452,12 @@ def VOP_MADMK_F16_t16 : VOP_MADMK { let IsTrue16 = 1; let IsRealTrue16 = 1; let DstRC = getVALUDstForVT.ret; - let Ins32 = (ins VSrcT_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1); + let Ins32 = (ins VSrcT_f16_Lo128:$src0, ImmOpType:$imm, VGPROp_16_Lo128:$src1); } def VOP_MADMK_F16_fake16 : VOP_MADMK { let IsTrue16 = 1; let DstRC = getVALUDstForVT_fake16.ret; - let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1); + let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPROp_32_Lo128:$src1); } def VOP_MADMK_F32 : VOP_MADMK ; def VOP_MADMK_F64 : VOP_MADMK ; @@ -496,14 +496,14 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v HasClamp, HasModifiers, HasModifiers, HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu - let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); - let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); + let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPROp_32:$src2X); + let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPROp_32:$src2Y); let InsVOPD3X = (ins Src0ModVOPD3:$src0X_modifiers, Src0VOPD3:$src0X, Src1ModVOPD3:$vsrc1X_modifiers, Src1RC32:$vsrc1X, - VGPRSrc_32:$src2X); + VGPROp_32:$src2X); let InsVOPD3Y = (ins Src0ModVOPD3:$src0Y_modifiers, Src0VOPD3:$src0Y, Src1ModVOPD3:$vsrc1Y_modifiers, Src1RC32:$vsrc1Y, - VGPRSrc_32:$src2Y); + VGPROp_32:$src2Y); let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, @@ -565,7 +565,7 @@ def VOP_MAC_F16_t16 : VOP_MAC { let DstRC64 = getVALUDstForVT.ret; let Src0RC64 = getVOP3SrcForVT.ret; let Src1RC64 = getVOP3SrcForVT.ret; - let Src0VOP3DPP = VGPRSrc_16; + let Src0VOP3DPP = VGPROp_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; @@ -597,7 +597,7 @@ def VOP_MAC_F16_fake16 : VOP_MAC { getVregSrcForVT.ret:$src2, // stub argument dpp8:$dpp8, Dpp8FI:$fi); let DstRC64 = getVALUDstForVT.ret; - let Src0VOP3DPP = VGPRSrc_32; + let Src0VOP3DPP = VGPROp_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; @@ -796,7 +796,7 @@ def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> { Src2RC64, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod, Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret; - let Src0VOP3DPP = VGPRSrc_16; + let Src0VOP3DPP = VGPROp_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; let Src1ModVOP3DPP = getSrcModVOP3VC.ret; @@ -808,7 +808,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { let Src0Mod = getSrc0Mod.ret; let Src1Mod = getSrcMod.ret; - let Src0VOP3DPP = VGPRSrc_32; + let Src0VOP3DPP = VGPROp_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; let Src1ModVOP3DPP = getSrcModVOP3VC.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index cd17382d8d308..c8a59efc106fe 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -44,7 +44,7 @@ class VOP3P_Mix_Profile sched, ValueType vt0, ValueType let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_16; + let Src0VOP3DPP = VGPROp_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -126,7 +126,7 @@ multiclass VOPC_Profile_t16 sched, ValueType vt0, ValueType let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_32; + let Src0VOP3DPP = VGPROp_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -173,7 +173,7 @@ multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, Va let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_16; + let Src0VOP3DPP = VGPROp_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -197,7 +197,7 @@ multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, Va let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_32; + let Src0VOP3DPP = VGPROp_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -892,7 +892,7 @@ class VOPC_Class_Profile sched, ValueType src0VT, ValueType // DPP8 forbids modifiers and can inherit from VOPC_Profile let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); - dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1); + dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPROp_32:$src0, VCSrc_b32:$src1); let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel), (ins))); let AsmVOP3Base = "$sdst, $src0_modifiers, $src1"; @@ -915,7 +915,7 @@ multiclass VOPC_Class_Profile_t16 sched> { let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_16; + let Src0VOP3DPP = VGPROp_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -941,7 +941,7 @@ multiclass VOPC_Class_Profile_t16 sched> { let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_32; + let Src0VOP3DPP = VGPROp_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -985,7 +985,7 @@ multiclass VOPC_Class_NoSdst_Profile_t16 sched> { let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_16; + let Src0VOP3DPP = VGPROp_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; @@ -1009,7 +1009,7 @@ multiclass VOPC_Class_NoSdst_Profile_t16 sched> { let Src0ModDPP = getSrcModDPP_t16.ret; let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; - let Src0VOP3DPP = VGPRSrc_32; + let Src0VOP3DPP = VGPROp_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td index 4d97b9c81ea86..f416c06540480 100644 --- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td @@ -224,7 +224,7 @@ foreach Gen = [GFX11GenD, GFX12GenD, GFX1250GenD] in { defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32")); defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32")); defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2) # Gen.Suffix; - defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY); + defvar outs = (outs VGPROp_32:$vdstX, VOPDDstYOperand:$vdstY); if !or(isOpXMADK, isOpYMADK) then { // If Both X and Y are MADK, the mandatory literal of X additionally must // use an alternate operand format which defers to the 'real' Y literal. From ee5c28ee39cdf6d6d3d39ce4183617d95982ac7f Mon Sep 17 00:00:00 2001 From: Joseph Nash Date: Mon, 8 Sep 2025 14:01:51 -0400 Subject: [PATCH 2/2] Add getVGPRSrcForVT --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7ca83eb4df035..2ea59585ee1ca 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1950,6 +1950,20 @@ class getVOP3VRegSrcForVT { 1 : VRegSrc_32); } +// VGPR only VOP3 src with 8 bit encoding e.g. VOP3DPP src0. +class getVGPRSrcForVT { + RegisterOperand ret = + !cond(!eq(VT.Size, 128) : VGPROp_128, + !eq(VT.Size, 96) : VGPROp_96, + !eq(VT.Size, 64) : VGPROp_64, + !eq(VT.Size, 48) : VGPROp_64, + !eq(VT.Size, 16) : !if(IsTrue16, + !if(IsFake16, VGPROp_32, + VGPROp_16), + VGPROp_32), + 1 : VGPROp_32); +} + // Src2 of VOP3 DPP instructions cannot be a literal class getVOP3DPPSrcForVT { RegisterOperand ret = @@ -2681,7 +2695,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field RegisterOperand Src0DPP = getVregSrcForVT.ret; field RegisterOperand Src1DPP = getVregSrcForVT.ret; field RegisterOperand Src2DPP = getVregSrcForVT.ret; - field RegisterOperand Src0VOP3DPP = VGPROp_32; + field RegisterOperand Src0VOP3DPP = getVGPRSrcForVT.ret; field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT.ret;