-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Allow WorkgroupID intrinsics in amdgpu_gfx functions #89773
Conversation
With GFX12 architected SGPRs the workgroup ids are trivially available in any function called from a compute entrypoint.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesWith GFX12 architected SGPRs the workgroup ids are trivially available Full diff: https://github.com/llvm/llvm-project/pull/89773.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 780dfaae11ef3e..de029a4d6bedd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4248,7 +4248,8 @@ bool AMDGPULegalizerInfo::loadInputValue(
AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
const ArgDescriptor WorkGroupIDZ =
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
- if (ST.hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ if (ST.hasArchitectedSGPRs() &&
+ (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
switch (ArgType) {
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
Arg = &WorkGroupIDX;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 17b6e0cb9c3b45..50159d435aef7e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2124,7 +2124,8 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
const ArgDescriptor WorkGroupIDZ =
ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
- if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ if (Subtarget->hasArchitectedSGPRs() &&
+ (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) {
switch (PVID) {
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
Reg = &WorkGroupIDX;
@@ -2798,7 +2799,9 @@ SDValue SITargetLowering::LowerFormalArguments(
(void)UserSGPRInfo;
if (!Subtarget->enableFlatScratch())
assert(!UserSGPRInfo.hasFlatScratchInit());
- if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
+ if ((CallConv != CallingConv::AMDGPU_CS &&
+ CallConv != CallingConv::AMDGPU_Gfx) ||
+ !Subtarget->hasArchitectedSGPRs())
assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ());
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 12433dc83c4892..bf4a501cc3159d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -110,7 +110,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
}
if (!AMDGPU::isGraphics(CC) ||
- (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
+ ((CC == CallingConv::AMDGPU_CS || CC == CallingConv::AMDGPU_CS) &&
+ ST.hasArchitectedSGPRs())) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
index cfff0a969da9e7..14fe4e5f48c67c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s
-; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
@@ -156,10 +156,37 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9ARCH-LABEL: workgroup_ids_gfx:
-; GFX9ARCH: ; %bb.0:
-; GFX9ARCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9ARCH-NEXT: s_setpc_b64 s[30:31]
+; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
+; GFX9ARCH-SDAG: ; %bb.0:
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v6, ttmp9
+; GFX9ARCH-SDAG-NEXT: s_and_b32 s34, ttmp7, 0xffff
+; GFX9ARCH-SDAG-NEXT: global_store_dword v[0:1], v6, off
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34
+; GFX9ARCH-SDAG-NEXT: s_lshr_b32 s34, ttmp7, 16
+; GFX9ARCH-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, s34
+; GFX9ARCH-SDAG-NEXT: global_store_dword v[4:5], v0, off
+; GFX9ARCH-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9ARCH-GISEL-LABEL: workgroup_ids_gfx:
+; GFX9ARCH-GISEL: ; %bb.0:
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v6, ttmp9
+; GFX9ARCH-GISEL-NEXT: s_and_b32 s34, ttmp7, 0xffff
+; GFX9ARCH-GISEL-NEXT: s_lshr_b32 s35, ttmp7, 16
+; GFX9ARCH-GISEL-NEXT: global_store_dword v[0:1], v6, off
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s34
+; GFX9ARCH-GISEL-NEXT: global_store_dword v[2:3], v0, off
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, s35
+; GFX9ARCH-GISEL-NEXT: global_store_dword v[4:5], v0, off
+; GFX9ARCH-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9ARCH-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: workgroup_ids_gfx:
; GFX12: ; %bb.0:
@@ -168,6 +195,18 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: v_dual_mov_b32 v6, ttmp9 :: v_dual_mov_b32 v7, s0
+; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
+; GFX12-NEXT: v_mov_b32_e32 v8, s1
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v[0:1], v6, off scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v[2:3], v7, off scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_store_b32 v[4:5], v8, off scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
%id.x = call i32 @llvm.amdgcn.workgroup.id.x()
%id.y = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -177,11 +216,3 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
store volatile i32 %id.z, ptr addrspace(1) %outz
ret void
}
-
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX9-GISEL: {{.*}}
-; GFX9-SDAG: {{.*}}
|
Maybe we should also allow them in all calling conventions except for non-compute entrypoints like amdgpu_ps? Maybe we should allow them in all calling conventions? |
If the registers are always available, why not? |
Hardware only sets them up for compute entrypoints. So you can use them in anything called from a compute entrypoint. So amdgpu_cs or amdgpu_kernel is definitely OK, amdgpu_ps etc definitely not OK, and all other calling conventions (including amdgpu_gfx) are only OK if they were called from a compute entrypoint. The question is, how much of this is worth trying to detect in the compiler? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably want a different version of isCompute that covers AMDGPU_Gfx, if not just include it there
With GFX12 architected SGPRs the workgroup ids are trivially available
in any function called from a compute entrypoint.