diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index aa85ee359060fa..74aaebaad4f1b0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2423,11 +2423,14 @@ SDValue SITargetLowering::LowerFormalArguments( if (IsGraphics) { assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() && - (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) && - !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && - !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && - !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() && - !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); + !Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() && + !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && + !Info->hasWorkItemIDZ()); + if (!Subtarget->enableFlatScratch()) + assert(!Info->hasFlatScratchInit()); + if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs()) + assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && + !Info->hasWorkGroupIDZ()); } if (CallConv == CallingConv::AMDGPU_PS) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index b2a433dd3db9eb..427a0d1fdf989e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -119,7 +119,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, else if (ST.isMesaGfxShader(F)) ImplicitBufferPtr = true; - if (!AMDGPU::isGraphics(CC)) { + if (!AMDGPU::isGraphics(CC) || + (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) { if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) WorkGroupIDX = true; @@ -128,7 +129,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z")) WorkGroupIDZ = true; + } + if (!AMDGPU::isGraphics(CC)) { if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) WorkItemIDX = true; diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll new file mode 100644 index 00000000000000..83ea07ab4d9246 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s +define amdgpu_cs void @_amdgpu_cs_main() { +; GFX9-SDAG-LABEL: _amdgpu_cs_main: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: _amdgpu_cs_main: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32> %ielemz, <4 x i32> undef, i32 0, i32 0, i32 0) + ret void +} + +declare i32 @llvm.amdgcn.workgroup.id.x() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() +declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg)