diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 22599773d562c..4b2b79335c8a2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1468,11 +1468,12 @@ class getVALUDstForVT { defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, VOPDstOperand_t16Lo128), VOPDstOperand); - RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, - !if(!eq(VT.Size, 128), VOPDstOperand, - !if(!eq(VT.Size, 64), VOPDstOperand, - !if(!eq(VT.Size, 16), op16, - VOPDstS64orS32)))); // else VT == i1 + RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand, + !eq(VT.Size, 128) : VOPDstOperand, + !eq(VT.Size, 64) : VOPDstOperand, + !eq(VT.Size, 32) : VOPDstOperand, + !eq(VT.Size, 16) : op16, + 1 : VOPDstS64orS32); // else VT == i1 } class getVALUDstForVT_fake16 { @@ -1556,40 +1557,23 @@ class getSDWASrcForVT { // given VT. class getVOP3SrcForVT { RegisterOperand ret = - !if(!eq(VT.Size, 128), - VRegSrc_128, - !if(!eq(VT.Size, 64), - !if(VT.isFP, - !if(!eq(VT.Value, v2f32.Value), - VSrc_v2f32, - VSrc_f64), - !if(!eq(VT.Value, v2i32.Value), - VSrc_v2b32, - VSrc_b64)), - !if(!eq(VT.Value, i1.Value), - SSrc_i1, - !if(VT.isFP, - !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), - !if(IsTrue16, VSrcT_f16, VSrc_f16), - !if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)), - VSrc_v2f16, - !if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)), - AVSrc_64, - VSrc_f32 - ) - ) - ), - !if(!eq(VT.Value, i16.Value), - !if(IsTrue16, VSrcT_b16, VSrc_b16), - !if(!eq(VT.Value, v2i16.Value), - VSrc_v2b16, - VSrc_b32 - ) - ) - ) - ) - ) - ); + !cond(!eq(VT, f64) : VSrc_f64, + !eq(VT, f32) : VSrc_f32, + !eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16), + !eq(VT, bf16) : !if(IsTrue16, VSrcT_f16, VSrc_f16), + !eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16), + !eq(VT, i1) : SSrc_i1, + !eq(VT, v2f32) : VSrc_v2f32, + !eq(VT, v2i32) : VSrc_v2b32, + !eq(VT, v2f16) : VSrc_v2f16, + !eq(VT, v2bf16) : VSrc_v2f16, + !eq(VT, v2i16) : VSrc_v2b16, + !eq(VT, v4f16) : AVSrc_64, + !eq(VT, v4bf16) : AVSrc_64, + !eq(VT.Size, 128) : VRegSrc_128, + !eq(VT.Size, 96) : VRegSrc_96, + !eq(VT.Size, 64) : VSrc_b64, + 1 : VSrc_b32); } // Src2 of VOP3 DPP instructions cannot be a literal diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index d4a1e8d185a1d..176b3c199eafd 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1214,6 +1214,7 @@ class SrcReg9 : RegisterOperand def VRegSrc_32 : SrcReg9; def VRegSrc_64 : SrcReg9; +def VRegSrc_96 : SrcReg9; def VRegSrc_128: SrcReg9; def VRegSrc_256: SrcReg9; def VRegOrLdsSrc_32 : SrcReg9; @@ -1230,6 +1231,10 @@ def VGPRSrc_32_Lo128 : RegisterOperand { let DecoderMethod = "DecodeVGPR_32RegisterClass"; } +def VGPRSrc_96 : RegisterOperand { + let DecoderMethod = "DecodeVReg_96RegisterClass"; +} + def VGPRSrc_16_Lo128 : RegisterOperand { let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass"; let EncoderMethod = "getMachineOpValueT16Lo128";