Skip to content

Commit 8122ccd

Browse files
shiltianjayfoad
andauthored
[AMDGPU] Set TGID_EN_X/Y/Z when cluster ID intrinsics are used (#159120)
Hardware initializes a single value in ttmp9 which is either the workgroup ID X or cluster ID X. Most of this patch is a refactoring to use a single `PreloadedValue` enumerator for this value, instead of two enumerators `WORKGROUP_ID_X` and `CLUSTER_ID_X` referring to the same value. This makes it simpler to have a single attribute `amdgpu-no-workgroup-id-x` indicating that this value is not used, which in turns sets the TGID_EN_X bit appropriately to tell the hardware whether to initialize it. All of the above applies to Y and Z similarly. Fixes: LWPSCGFX13-568 Co-authored-by: Jay Foad <jay.foad@amd.com>
1 parent 158eeb3 commit 8122ccd

31 files changed

+652
-85
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -809,15 +809,15 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
809809
AMDGPUFunctionArgInfo::LDS_KERNEL_ID,
810810
};
811811

812-
static constexpr StringLiteral ImplicitAttrNames[] = {
813-
"amdgpu-no-dispatch-ptr",
814-
"amdgpu-no-queue-ptr",
815-
"amdgpu-no-implicitarg-ptr",
816-
"amdgpu-no-dispatch-id",
817-
"amdgpu-no-workgroup-id-x",
818-
"amdgpu-no-workgroup-id-y",
819-
"amdgpu-no-workgroup-id-z",
820-
"amdgpu-no-lds-kernel-id",
812+
static constexpr StringLiteral ImplicitAttrNames[][2] = {
813+
{"amdgpu-no-dispatch-ptr", ""},
814+
{"amdgpu-no-queue-ptr", ""},
815+
{"amdgpu-no-implicitarg-ptr", ""},
816+
{"amdgpu-no-dispatch-id", ""},
817+
{"amdgpu-no-workgroup-id-x", "amdgpu-no-cluster-id-x"},
818+
{"amdgpu-no-workgroup-id-y", "amdgpu-no-cluster-id-y"},
819+
{"amdgpu-no-workgroup-id-z", "amdgpu-no-cluster-id-z"},
820+
{"amdgpu-no-lds-kernel-id", ""},
821821
};
822822

823823
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -833,7 +833,9 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
833833
LLT ArgTy;
834834

835835
// If the callee does not use the attribute value, skip copying the value.
836-
if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
836+
if (all_of(ImplicitAttrNames[I++], [&](StringRef AttrName) {
837+
return AttrName.empty() || Info.CB->hasFnAttr(AttrName);
838+
}))
837839
continue;
838840

839841
std::tie(OutgoingArg, ArgRC, ArgTy) =

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3793,21 +3793,23 @@ void SITargetLowering::passSpecialInputs(
37933793
// in the same location as the input.
37943794
// clang-format off
37953795
static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
3796-
StringLiteral> ImplicitAttrs[] = {
3797-
{AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
3798-
{AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
3799-
{AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
3800-
{AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
3801-
{AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
3802-
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
3803-
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"},
3804-
{AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"},
3805-
};
3796+
std::array<StringLiteral, 2>> ImplicitAttrs[] = {
3797+
{AMDGPUFunctionArgInfo::DISPATCH_PTR, {"amdgpu-no-dispatch-ptr", ""}},
3798+
{AMDGPUFunctionArgInfo::QUEUE_PTR, {"amdgpu-no-queue-ptr", ""}},
3799+
{AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, {"amdgpu-no-implicitarg-ptr", ""}},
3800+
{AMDGPUFunctionArgInfo::DISPATCH_ID, {"amdgpu-no-dispatch-id", ""}},
3801+
{AMDGPUFunctionArgInfo::WORKGROUP_ID_X, {"amdgpu-no-workgroup-id-x", "amdgpu-no-cluster-id-x"}},
3802+
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, {"amdgpu-no-workgroup-id-y", "amdgpu-no-cluster-id-y"}},
3803+
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, {"amdgpu-no-workgroup-id-z", "amdgpu-no-cluster-id-z"}},
3804+
{AMDGPUFunctionArgInfo::LDS_KERNEL_ID, {"amdgpu-no-lds-kernel-id", ""}},
3805+
};
38063806
// clang-format on
38073807

3808-
for (auto [InputID, Attr] : ImplicitAttrs) {
3808+
for (auto [InputID, Attrs] : ImplicitAttrs) {
38093809
// If the callee does not use the attribute value, skip copying the value.
3810-
if (CLI.CB->hasFnAttr(Attr))
3810+
if (all_of(Attrs, [&](StringRef Attr) {
3811+
return Attr.empty() || CLI.CB->hasFnAttr(Attr);
3812+
}))
38113813
continue;
38123814

38133815
const auto [OutgoingArg, ArgRC, ArgTy] =

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,16 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
132132
if (!AMDGPU::isGraphics(CC) ||
133133
((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_Gfx) &&
134134
ST.hasArchitectedSGPRs())) {
135-
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
135+
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x") ||
136+
!F.hasFnAttribute("amdgpu-no-cluster-id-x"))
136137
WorkGroupIDX = true;
137138

138-
if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
139+
if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y") ||
140+
!F.hasFnAttribute("amdgpu-no-cluster-id-y"))
139141
WorkGroupIDY = true;
140142

141-
if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
143+
if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z") ||
144+
!F.hasFnAttribute("amdgpu-no-cluster-id-z"))
142145
WorkGroupIDZ = true;
143146
}
144147

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,5 +430,5 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
430430

431431
declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #1
432432

433-
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
433+
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
434434
attributes #1 = { argmemonly nofree nounwind willreturn writeonly }

llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,4 +232,4 @@ define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
232232
ret i64 %add
233233
}
234234

235-
attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
235+
attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
8888
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
8989
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
9090
; CHECK-NEXT: S_ENDPGM 0
91-
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
91+
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
9292
ret void
9393
}
9494

@@ -124,7 +124,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
124124
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
125125
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
126126
; CHECK-NEXT: S_ENDPGM 0
127-
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
127+
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
128128
ret void
129129
}
130130

@@ -198,7 +198,7 @@ define void @func_call_no_workgroup_ids() {
198198
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
199199
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
200200
; CHECK-NEXT: SI_RETURN
201-
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
201+
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
202202
ret void
203203
}
204204

@@ -223,7 +223,7 @@ define void @func_call_no_other_sgprs() {
223223
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
224224
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
225225
; CHECK-NEXT: SI_RETURN
226-
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
226+
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
227227
ret void
228228
}
229229

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1486,5 +1486,5 @@ entry:
14861486
}
14871487

14881488
attributes #0 = { nounwind }
1489-
attributes #1 = { nounwind noinline "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
1489+
attributes #1 = { nounwind noinline "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
14901490

llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ define amdgpu_kernel void @amdhsa_kernarg_preload_1_implicit_2(i32 inreg) #0 { r
7070

7171
define amdgpu_kernel void @amdhsa_kernarg_preload_0_implicit_2(i32) #0 { ret void }
7272

73-
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
73+
attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }

llvm/test/CodeGen/AMDGPU/call-argument-types.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7283,7 +7283,7 @@ declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x
72837283
declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
72847284
<5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
72857285

7286-
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
7286+
attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
72877287
attributes #1 = { nounwind readnone }
72887288
attributes #2 = { nounwind noinline }
72897289

llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@ define float @asm_changes_mode(float %x, float %y) #0 {
5454

5555
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
5656

57-
attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
57+
attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }

0 commit comments

Comments
 (0)