Skip to content

Commit

Permalink
[AMDGPU] Add 256-bit vdst and 96-bit src to profile switches. NFC. (#…
Browse files Browse the repository at this point in the history
…81801)

I need these operands for a future patch. Also simplify conditions
there. If nothing using !cond instead of nesting !if's does not need to
realign code every time a new type is added.
  • Loading branch information
rampitec committed Feb 15, 2024
1 parent ba27993 commit c6a7c4d
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 39 deletions.
62 changes: 23 additions & 39 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1468,11 +1468,12 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
VOPDstOperand_t16Lo128),
VOPDstOperand<VGPR_32>);
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
!if(!eq(VT.Size, 16), op16,
VOPDstS64orS32)))); // else VT == i1
RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
!eq(VT.Size, 16) : op16,
1 : VOPDstS64orS32); // else VT == i1
}

class getVALUDstForVT_fake16<ValueType VT> {
Expand Down Expand Up @@ -1556,40 +1557,23 @@ class getSDWASrcForVT <ValueType VT> {
// given VT.
class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VRegSrc_128,
!if(!eq(VT.Size, 64),
!if(VT.isFP,
!if(!eq(VT.Value, v2f32.Value),
VSrc_v2f32,
VSrc_f64),
!if(!eq(VT.Value, v2i32.Value),
VSrc_v2b32,
VSrc_b64)),
!if(!eq(VT.Value, i1.Value),
SSrc_i1,
!if(VT.isFP,
!if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
!if(IsTrue16, VSrcT_f16, VSrc_f16),
!if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)),
VSrc_v2f16,
!if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)),
AVSrc_64,
VSrc_f32
)
)
),
!if(!eq(VT.Value, i16.Value),
!if(IsTrue16, VSrcT_b16, VSrc_b16),
!if(!eq(VT.Value, v2i16.Value),
VSrc_v2b16,
VSrc_b32
)
)
)
)
)
);
!cond(!eq(VT, f64) : VSrc_f64,
!eq(VT, f32) : VSrc_f32,
!eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
!eq(VT, bf16) : !if(IsTrue16, VSrcT_f16, VSrc_f16),
!eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16),
!eq(VT, i1) : SSrc_i1,
!eq(VT, v2f32) : VSrc_v2f32,
!eq(VT, v2i32) : VSrc_v2b32,
!eq(VT, v2f16) : VSrc_v2f16,
!eq(VT, v2bf16) : VSrc_v2f16,
!eq(VT, v2i16) : VSrc_v2b16,
!eq(VT, v4f16) : AVSrc_64,
!eq(VT, v4bf16) : AVSrc_64,
!eq(VT.Size, 128) : VRegSrc_128,
!eq(VT.Size, 96) : VRegSrc_96,
!eq(VT.Size, 64) : VSrc_b64,
1 : VSrc_b32);
}

// Src2 of VOP3 DPP instructions cannot be a literal
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,7 @@ class SrcReg9<RegisterClass regClass, string width> : RegisterOperand<regClass>

def VRegSrc_32 : SrcReg9<VGPR_32, "OPW32">;
def VRegSrc_64 : SrcReg9<VReg_64, "OPW64">;
def VRegSrc_96 : SrcReg9<VReg_96, "OPW96">;
def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
Expand All @@ -1230,6 +1231,10 @@ def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
let DecoderMethod = "DecodeVGPR_32RegisterClass";
}

def VGPRSrc_96 : RegisterOperand<VReg_96> {
let DecoderMethod = "DecodeVReg_96RegisterClass";
}

def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass";
let EncoderMethod = "getMachineOpValueT16Lo128";
Expand Down

0 comments on commit c6a7c4d

Please sign in to comment.