diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 3de20bb44e0c1..7582953358152 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -851,16 +851,20 @@ class AMDGPUImageDimIntrinsic sdnodeprops> : Intrinsic< P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return !listconcat( - !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic - !if(P_.IsAtomic, [], [llvm_i32_ty]), // dmask(imm) - P_.AddrTypes, // vaddr(VGPR) - [llvm_v8i32_ty], // rsrc(SGPR) - !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR) - llvm_i1_ty], []), // unorm(imm) - [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) - llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc; - // gfx12+ imm: bits [0-2] = th, bits [3-4] = scope) - + !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic + !if(P_.IsAtomic, [], [llvm_i32_ty]), // dmask(imm) + P_.AddrTypes, // vaddr(VGPR) + [llvm_v8i32_ty], // rsrc(SGPR) + !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR) + llvm_i1_ty], []), // unorm(imm) + [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) + llvm_i32_ty]), // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, + // bit 2 = dlc (gfx10/gfx11), + // bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv !listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn], !if(P_.IsAtomic, [], [ImmArg.DmaskArgIndex>>]), !if(P_.IsSample, [ImmArg.UnormArgIndex>>], []), @@ -1085,11 +1089,15 @@ def int_amdgcn_buffer_load : AMDGPUBufferLoad; // the offset argument is uniform. def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], - [llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // byte offset - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc; - // gfx12+ imm: bits [0-2] = th, bits [3-4] = scope) - // Note: volatile bit is **not** permitted here. + [llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // byte offset + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // Note: volatile bit is **not** permitted here. [IntrNoMem, ImmArg>]>, AMDGPURsrcIntrinsic<0>; @@ -1123,19 +1131,16 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore; // operation is volatile. class AMDGPURawBufferLoad : DefaultAttrsIntrinsic < [data_ty], - [llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; @@ -1143,20 +1148,16 @@ def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; class AMDGPURawPtrBufferLoad : DefaultAttrsIntrinsic < [data_ty], - [AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) - + [AMDGPUBufferRsrcTy, // rsrc(SGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrReadMem, ReadOnly>, NoCapture>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1165,20 +1166,17 @@ def int_amdgcn_raw_ptr_buffer_load : AMDGPURawPtrBufferLoad; class AMDGPUStructBufferLoad : DefaultAttrsIntrinsic < [data_ty], - [llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; @@ -1186,20 +1184,17 @@ def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; class AMDGPUStructPtrBufferLoad : DefaultAttrsIntrinsic < [data_ty], - [AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [AMDGPUBufferRsrcTy, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrReadMem, ReadOnly>, NoCapture>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1208,20 +1203,17 @@ def int_amdgcn_struct_ptr_buffer_load : AMDGPUStructPtrBufferLoad; class AMDGPURawBufferStore : DefaultAttrsIntrinsic < [], - [data_ty, // vdata(VGPR) - llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [data_ty, // vdata(VGPR) + llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; @@ -1229,20 +1221,17 @@ def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; class AMDGPURawPtrBufferStore : DefaultAttrsIntrinsic < [], - [data_ty, // vdata(VGPR) - AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [data_ty, // vdata(VGPR) + AMDGPUBufferRsrcTy, // rsrc(SGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrWriteMem, WriteOnly>, NoCapture>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1251,21 +1240,18 @@ def int_amdgcn_raw_ptr_buffer_store : AMDGPURawPtrBufferStore; class AMDGPUStructBufferStore : DefaultAttrsIntrinsic < [], - [data_ty, // vdata(VGPR) - llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [data_ty, // vdata(VGPR) + llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; @@ -1273,21 +1259,18 @@ def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; class AMDGPUStructPtrBufferStore : DefaultAttrsIntrinsic < [], - [data_ty, // vdata(VGPR) - AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [data_ty, // vdata(VGPR) + AMDGPUBufferRsrcTy, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrWriteMem, WriteOnly>, NoCapture>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1540,33 +1523,29 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz)) - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic < - [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 + [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds` checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz) - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty, // offset(VGPR/imm, included in bounds` checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrReadMem, ReadOnly>, NoCapture>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1578,16 +1557,13 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1599,16 +1575,13 @@ def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrWriteMem, WriteOnly>, NoCapture>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1620,59 +1593,50 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic < - [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 + [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrReadMem, ReadOnly>, NoCapture>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic < [], - [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 + [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 AMDGPUBufferRsrcTy, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrArgMemOnly, IntrWriteMem, WriteOnly>, NoCapture>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1685,16 +1649,13 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11), - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1746,44 +1707,38 @@ def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP; class AMDGPURawBufferLoadLDS : Intrinsic < [], - [llvm_v4i32_ty, // rsrc(SGPR) - LLVMQualPointerType<3>, // LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 - llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11)) - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [llvm_v4i32_ty, // rsrc(SGPR) + LLVMQualPointerType<3>, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS; class AMDGPURawPtrBufferLoadLDS : Intrinsic < [], - [AMDGPUBufferRsrcTy, // rsrc(SGPR) - LLVMQualPointerType<3>, // LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 - llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11)) - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [AMDGPUBufferRsrcTy, // rsrc(SGPR) + LLVMQualPointerType<3>, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWillReturn, IntrArgMemOnly, ReadOnly>, NoCapture>, WriteOnly>, NoCapture>, @@ -1793,46 +1748,40 @@ def int_amdgcn_raw_ptr_buffer_load_lds : AMDGPURawPtrBufferLoadLDS; class AMDGPUStructBufferLoadLDS : Intrinsic < [], - [llvm_v4i32_ty, // rsrc(SGPR) - LLVMQualPointerType<3>, // LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11)) - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [llvm_v4i32_ty, // rsrc(SGPR) + LLVMQualPointerType<3>, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS; class AMDGPUStructPtrBufferLoadLDS : Intrinsic < [], - [AMDGPUBufferRsrcTy, // rsrc(SGPR) - LLVMQualPointerType<3> , // LDS base offset - llvm_i32_ty, // Data byte size: 1/2/4 - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) - llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, - // bit 1 = slc, - // bit 2 = dlc on gfx10/gfx11)) - // swizzled buffer (bit 3 = swz), - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - // all: - // volatile op (bit 31, stripped at lowering)) + [AMDGPUBufferRsrcTy, // rsrc(SGPR) + LLVMQualPointerType<3>, // LDS base offset + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling) + llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) + llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling) + llvm_i32_ty], // auxiliary/cachepolicy(imm): + // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), + // bit 3 = swz, bit 4 = scc (gfx90a) + // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx12+: bits [0-2] = th, bits [3-4] = scope, + // bit 5 = nv, bit 6 = swz + // all: volatile op (bit 31, stripped at lowering) [IntrWillReturn, IntrArgMemOnly, ReadOnly>, NoCapture>, WriteOnly>, NoCapture>,