diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index bf787b230067d..291c03ab223a8 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -272,13 +272,13 @@ class MIMGNSAHelper +class PartialNSAHelper : NSAHelper<> { - list addr_types = + list addr_types = !if(!ge(num_addrs, max_addr), - !listconcat(!listsplat(VGPR_32, !sub(max_addr, 1)), [LastAddrRC]), - !listsplat(VGPR_32, num_addrs)); + !listconcat(!listsplat(VGPROp_32, !sub(max_addr, 1)), [LastAddrRC]), + !listsplat(VGPROp_32, num_addrs)); int VAddrCount = !if(!gt(num_addrs, max_addr), max_addr, num_addrs); list AddrAsmNames = !foreach(i, !range(VAddrCount), "vaddr" # i); @@ -359,7 +359,7 @@ class MIMG_gfx11 // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11 addr_types=[], - RegisterClass LastAddrRC = VGPR_32> + RegisterOperand LastAddrRC = VGPROp_32> : MIMG, MIMGe_gfx11 { let SubtargetPredicate = isGFX11Only; let AssemblerPredicate = isGFX11Only; @@ -400,7 +400,7 @@ class VIMAGE_gfx12 + RegisterOperand Addr3RC> : VSAMPLE, VSAMPLEe { let SubtargetPredicate = isGFX12Plus; let AssemblerPredicate = isGFX12Plus; @@ -421,7 +421,7 @@ class VSAMPLE_gfx12 : MIMG_gfx6789 { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc, @@ -434,10 +434,10 @@ class MIMG_NoSampler_Helper : MIMG_gfx90a .ret:$vdata), dns> { - let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc, + let InOperandList = !con((ins getAlign2RegOp.ret:$vaddr, SReg_256_XNULL:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); @@ -446,7 +446,7 @@ class MIMG_NoSampler_Helper_gfx90a : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask, @@ -471,7 +471,7 @@ class MIMG_NoSampler_nsa_gfx10 : MIMG_gfx11 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask, @@ -508,7 +508,7 @@ class VIMAGE_NoSampler_gfx12 : VSAMPLE_gfx12 { let InOperandList = !con(AddrIns, @@ -525,7 +525,7 @@ class VSAMPLE_Sampler_gfx12 : VSAMPLE_gfx12 { let InOperandList = !con(AddrIns, @@ -550,16 +550,16 @@ multiclass MIMG_NoSampler_Src_Helper ; if !not(ExtendedImageInst) then - def _V1_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V1_gfx10 : MIMG_NoSampler_gfx10; } if op.HAS_GFX11 then { - def _V1_gfx11 : MIMG_NoSampler_gfx11; } } @@ -576,14 +576,14 @@ multiclass MIMG_NoSampler_Src_Helper ; + def _V2 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then - def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V2_gfx10 : MIMG_NoSampler_gfx10; + def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a ; + def _V2_gfx10 : MIMG_NoSampler_gfx10; def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } if op.HAS_GFX11 then { - def _V2_gfx11 : MIMG_NoSampler_gfx11; + def _V2_gfx11 : MIMG_NoSampler_gfx11; def _V2_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; } } @@ -600,14 +600,14 @@ multiclass MIMG_NoSampler_Src_Helper ; + def _V3 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then - def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V3_gfx10 : MIMG_NoSampler_gfx10; + def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a ; + def _V3_gfx10 : MIMG_NoSampler_gfx10; def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } if op.HAS_GFX11 then { - def _V3_gfx11 : MIMG_NoSampler_gfx11; + def _V3_gfx11 : MIMG_NoSampler_gfx11; def _V3_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; } } @@ -624,15 +624,15 @@ multiclass MIMG_NoSampler_Src_Helper ; + def _V4 : MIMG_NoSampler_Helper ; if !not(ExtendedImageInst) then - def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a ; - def _V4_gfx10 : MIMG_NoSampler_gfx10; + def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a ; + def _V4_gfx10 : MIMG_NoSampler_gfx10; def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; } if op.HAS_GFX11 then { - def _V4_gfx11 : MIMG_NoSampler_gfx11; + def _V4_gfx11 : MIMG_NoSampler_gfx11; def _V4_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11; } @@ -640,7 +640,7 @@ multiclass MIMG_NoSampler_Src_Helper ; } else { @@ -1128,7 +1128,7 @@ multiclass MIMG_Atomic_Renamed ; class MIMG_Sampler_Helper + RegisterOperand src_rc, string dns=""> : MIMG_gfx6789 { let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, @@ -1139,7 +1139,7 @@ class MIMG_Sampler_Helper + RegisterOperand src_rc, string dns=""> : MIMG_gfx90a { let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp, DMask:$dmask, UNorm:$unorm, CPol:$cpol, @@ -1164,7 +1164,7 @@ class MIMG_Sampler_Asm_gfx10p { } class MIMG_Sampler_gfx10 : MIMG_gfx10 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1180,7 +1180,7 @@ class MIMG_Sampler_nsa_gfx10 : MIMG_gfx10 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1200,7 +1200,7 @@ class MIMG_Sampler_nortn_nsa_gfx10 : MIMG_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1209,7 +1209,7 @@ class MIMG_Sampler_gfx11 + RegisterOperand LastVAddrSize, string dns=""> : MIMG_nsa_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p.ret; @@ -1217,7 +1217,7 @@ class MIMG_Sampler_nsa_gfx11 : MIMG_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret; @@ -1227,7 +1227,7 @@ class MIMG_Sampler_nortn_gfx11 + RegisterOperand LastVAddrSize, string dns=""> : MIMG_nsa_gfx11 { let InOperandList = MIMG_Sampler_OpList_gfx10p.ret; let AsmString = MIMG_Sampler_Asm_gfx10p.ret; @@ -1237,20 +1237,20 @@ class MIMG_Sampler_nortn_nsa_gfx11 { int NumWords = dw; - RegisterClass RegClass = !if(!le(AddrDW, 0), ?, - !if(!eq(AddrDW, 1), VGPR_32, - !if(!eq(AddrDW, 2), VReg_64, - !if(!eq(AddrDW, 3), VReg_96, - !if(!eq(AddrDW, 4), VReg_128, - !if(!eq(AddrDW, 5), VReg_160, - !if(!eq(AddrDW, 6), VReg_192, - !if(!eq(AddrDW, 7), VReg_224, - !if(!eq(AddrDW, 8), VReg_256, - !if(!eq(AddrDW, 9), VReg_288, - !if(!eq(AddrDW, 10), VReg_320, - !if(!eq(AddrDW, 11), VReg_352, - !if(!eq(AddrDW, 12), VReg_384, - !if(!le(AddrDW, 16), VReg_512, ?)))))))))))))); + RegisterOperand RegClass = !if(!le(AddrDW, 0), ?, + !if(!eq(AddrDW, 1), VGPROp_32, + !if(!eq(AddrDW, 2), VGPROp_64, + !if(!eq(AddrDW, 3), VGPROp_96, + !if(!eq(AddrDW, 4), VGPROp_128, + !if(!eq(AddrDW, 5), VGPROp_160, + !if(!eq(AddrDW, 6), VGPROp_192, + !if(!eq(AddrDW, 7), VGPROp_224, + !if(!eq(AddrDW, 8), VGPROp_256, + !if(!eq(AddrDW, 9), VGPROp_288, + !if(!eq(AddrDW, 10), VGPROp_320, + !if(!eq(AddrDW, 11), VGPROp_352, + !if(!eq(AddrDW, 12), VGPROp_384, + !if(!le(AddrDW, 16), VGPROp_512, ?)))))))))))))); // Whether the instruction variant with this vaddr size should be enabled for // the auto-generated disassembler. @@ -1514,8 +1514,10 @@ multiclass MIMG_Gather_WQM class MIMG_IntersectRay_Helper { int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11))); - RegisterClass RegClass = MIMGAddrSize.RegClass; - int VAddrDwords = !srl(RegClass.Size, 5); + RegisterOperand RegClass = MIMGAddrSize.RegClass; + + defvar Size = !cast(RegClass.RegClass).Size; + int VAddrDwords = !srl(Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); @@ -1526,7 +1528,7 @@ class MIMG_IntersectRay_Helper { true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); } -class MIMG_IntersectRay_gfx10 +class MIMG_IntersectRay_gfx10 : MIMG_gfx10 { let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16); let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16"; @@ -1540,7 +1542,7 @@ class MIMG_IntersectRay_nsa_gfx10 let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16"; } -class MIMG_IntersectRay_gfx11 +class MIMG_IntersectRay_gfx11 : MIMG_gfx11 { let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16); let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16"; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index aa5dae09ca185..c8231b470abae 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2596,24 +2596,42 @@ class getAlign2RegOp { RegisterOperand ret = !cond(!eq(RC, VGPROp_16) : VGPROp_16, !eq(RC, VGPROp_32) : VGPROp_32, + !eq(RC, VGPROp_64) : VGPROp_64_Align2, !eq(RC, VGPROp_64_Align1) : VGPROp_64_Align2, + !eq(RC, VGPROp_64_Align2) : VGPROp_64_Align2, + !eq(RC, VGPROp_96) : VGPROp_96_Align2, !eq(RC, VGPROp_96_Align1) : VGPROp_96_Align2, + !eq(RC, VGPROp_96_Align2) : VGPROp_96_Align2, + !eq(RC, VGPROp_128) : VGPROp_128_Align2, !eq(RC, VGPROp_128_Align1) : VGPROp_128_Align2, + !eq(RC, VGPROp_128_Align2) : VGPROp_128_Align2, + !eq(RC, VGPROp_160) : VGPROp_160_Align2, !eq(RC, VGPROp_160_Align1) : VGPROp_160_Align2, + !eq(RC, VGPROp_160_Align2) : VGPROp_160_Align2, + !eq(RC, VGPROp_1024) : VGPROp_1024_Align2, !eq(RC, VGPROp_1024_Align1) : VGPROp_1024_Align2, + !eq(RC, VGPROp_1024_Align2) : VGPROp_1024_Align2, + !eq(RC, AVLdSt_32) : AVLdSt_32, + !eq(RC, AVLdSt_64_Align1) : AVLdSt_64_Align2, !eq(RC, AVLdSt_64) : AVLdSt_64_Align2, + !eq(RC, AVLdSt_96) : AVLdSt_96_Align2, + !eq(RC, AVLdSt_96_Align1) : AVLdSt_96_Align1, !eq(RC, AVLdSt_96_Align1) : AVLdSt_96_Align2, + !eq(RC, AVLdSt_128) : AVLdSt_128_Align2, !eq(RC, AVLdSt_128_Align1) : AVLdSt_128_Align2, + !eq(RC, AVLdSt_128_Align2) : AVLdSt_128_Align2, + !eq(RC, AVLdSt_160) : AVLdSt_160_Align2, - !eq(RC, AVLdSt_160_Align1) : AVLdSt_160_Align2); + !eq(RC, AVLdSt_160_Align1) : AVLdSt_160_Align2, + !eq(RC, AVLdSt_160_Align2) : AVLdSt_160_Align2); } class getEquivalentAGPROperand { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5f5eec49bab06..31dd6b9e8d84d 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1327,7 +1327,7 @@ def VGPROp_16 : VGPROp { } def VGPROp_32 : VGPROp; -foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "512", "1024"] in { +foreach size = ["64", "96", "128", "160", "192", "224", "256", "288", "320", "352", "384", "512", "1024"] in { def VGPROp_#size : VGPROp("VReg_"#size)>; }