diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index f592660f4965e..5f0abbec54fb5 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -4114,9 +4114,10 @@ The fields used by CP for code objects before V3 also match those specified in Used by CP to set up ``COMPUTE_PGM_RSRC2.SCRATCH_EN``. 5:1 5 bits USER_SGPR_COUNT The total number of SGPR - user data registers - requested. This number must - match the number of user + user data + registers requested. This + number must be greater than + or equal to the number of user data registers enabled. Used by CP to set up @@ -12107,6 +12108,8 @@ terminated by an ``.end_amdhsa_kernel`` directive. :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. ``.amdhsa_kernarg_size`` 0 GFX6-GFX10 Controls KERNARG_SIZE in :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_count`` 0 GFX6-GFX10 Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2 + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table` ``.amdhsa_user_sgpr_private_segment_buffer`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. ``.amdhsa_user_sgpr_dispatch_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_PTR in diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c71205b17a1a5..f6c54fe3e7bf9 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4568,7 +4568,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { uint64_t AccumOffset = 0; SMRange SGPRRange; uint64_t NextFreeSGPR = 0; - unsigned UserSGPRCount = 0; + + // Count the number of user SGPRs implied from the enabled feature bits. + unsigned ImpliedUserSGPRCount = 0; + + // Track if the asm explicitly contains the directive for the user SGPR + // count. + Optional ExplicitUserSGPRCount; bool ReserveVCC = true; bool ReserveFlatScr = true; Optional EnableWavefrontSize32; @@ -4617,6 +4623,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isUInt(Val)) return OutOfRangeError(ValRange); KD.kernarg_size = Val; + } else if (ID == ".amdhsa_user_sgpr_count") { + ExplicitUserSGPRCount = Val; } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, @@ -4626,31 +4634,31 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, Val, ValRange); if (Val) - UserSGPRCount += 4; + ImpliedUserSGPRCount += 4; } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, ValRange); if (Val) - UserSGPRCount += 2; + ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, ValRange); if (Val) - UserSGPRCount += 2; + ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, Val, ValRange); if (Val) - UserSGPRCount += 2; + ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, ValRange); if (Val) - UserSGPRCount += 2; + ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, @@ -4660,13 +4668,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, ValRange); if (Val) - UserSGPRCount += 2; + ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, Val, ValRange); if (Val) - UserSGPRCount += 1; + ImpliedUserSGPRCount += 1; } else if (ID == ".amdhsa_wavefront_size32") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); @@ -4850,6 +4858,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, SGPRBlocks); + if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) + return TokError("amdgpu_user_sgpr_count smaller than than implied by " + "enabled user SGPRs"); + + unsigned UserSGPRCount = + ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; + if (!isUInt(UserSGPRCount)) return TokError("too many user SGPRs enabled"); AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 9a9a2c973f448..9578bdb0bad07 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -319,6 +319,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( << KD.private_segment_fixed_size << '\n'; OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n'; + PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD, + compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT); + if (!hasArchitectedFlatScratch(STI)) PRINT_FIELD( OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, diff --git a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll index 8389bbd1bfb56..2bc34cedd1e03 100644 --- a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll @@ -12,6 +12,7 @@ ; OSABI-AMDHSA-ASM: .section .rodata,#alloc ; OSABI-AMDHSA-ASM: .p2align 6 ; OSABI-AMDHSA-ASM: .amdhsa_kernel fadd +; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_count 6 ; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_private_segment_buffer 1 ; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_kernarg_segment_ptr 1 ; OSABI-AMDHSA-ASM: .amdhsa_next_free_vgpr 3 @@ -30,6 +31,7 @@ ; OSABI-AMDHSA-ASM: .section .rodata,#alloc ; OSABI-AMDHSA-ASM: .p2align 6 ; OSABI-AMDHSA-ASM: .amdhsa_kernel fsub +; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_count 6 ; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_private_segment_buffer 1 ; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_kernarg_segment_ptr 1 ; OSABI-AMDHSA-ASM: .amdhsa_next_free_vgpr 3 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll index 2afce5352b60e..aebbe33fd236c 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll @@ -57,6 +57,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() { ; CHECK-NEXT: s_endpgm ; CHECK: .amdhsa_kernarg_size 0 +; CHECK-NEXT: .amdhsa_user_sgpr_count 6 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0 diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll index ff409b938319b..d9fdfb0d101af 100644 --- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll +++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -9,6 +9,7 @@ declare void @llvm.debugtrap() #1 ; HSA-NEXT: .amdhsa_group_segment_fixed_size 0 ; HSA-NEXT: .amdhsa_private_segment_fixed_size 0 ; HSA-NEXT: .amdhsa_kernarg_size 8 +; HSA-NEXT: .amdhsa_user_sgpr_count 8 ; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; HSA: .end_amdhsa_kernel diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll index 8e3e7776fb95e..7cc2b8214a36d 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -18,6 +18,7 @@ define amdgpu_kernel void @max_alignment_128() #0 { ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 ; VI-NEXT: .amdhsa_private_segment_fixed_size 256 ; VI-NEXT: .amdhsa_kernarg_size 0 +; VI-NEXT: .amdhsa_user_sgpr_count 6 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -65,6 +66,7 @@ define amdgpu_kernel void @max_alignment_128() #0 { ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 ; GFX9-NEXT: .amdhsa_kernarg_size 0 +; GFX9-NEXT: .amdhsa_user_sgpr_count 6 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -119,6 +121,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 { ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 ; VI-NEXT: .amdhsa_private_segment_fixed_size 8 ; VI-NEXT: .amdhsa_kernarg_size 0 +; VI-NEXT: .amdhsa_user_sgpr_count 6 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -166,6 +169,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 { ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8 ; GFX9-NEXT: .amdhsa_kernarg_size 0 +; GFX9-NEXT: .amdhsa_user_sgpr_count 6 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -220,6 +224,7 @@ define amdgpu_kernel void @alignstack_attr() #2 { ; VI-NEXT: .amdhsa_group_segment_fixed_size 0 ; VI-NEXT: .amdhsa_private_segment_fixed_size 128 ; VI-NEXT: .amdhsa_kernarg_size 0 +; VI-NEXT: .amdhsa_user_sgpr_count 6 ; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 @@ -267,6 +272,7 @@ define amdgpu_kernel void @alignstack_attr() #2 { ; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 ; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 ; GFX9-NEXT: .amdhsa_kernarg_size 0 +; GFX9-NEXT: .amdhsa_user_sgpr_count 6 ; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 ; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s index b7a7ad824851e..690a86acbccc3 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -123,6 +123,7 @@ special_sgpr: // ASM-NEXT: .amdhsa_group_segment_fixed_size 1 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1 // ASM-NEXT: .amdhsa_kernarg_size 8 +// ASM-NEXT: .amdhsa_user_sgpr_count 15 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s index 76b82d4ae3321..9f854986d7bc4 100644 --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-v3.s @@ -132,6 +132,7 @@ disabled_user_sgpr: // ASM-NEXT: .amdhsa_group_segment_fixed_size 1 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1 // ASM-NEXT: .amdhsa_kernarg_size 8 +// ASM-NEXT: .amdhsa_user_sgpr_count 15 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 @@ -276,7 +277,7 @@ v_mov_b32_e32 v16, s3 .end_amdgpu_metadata // ASM: .amdgpu_metadata -// ASM: amdhsa.kernels: +// ASM: amdhsa.kernels: // ASM: - .group_segment_fixed_size: 16 // ASM: .kernarg_segment_align: 64 // ASM: .kernarg_segment_size: 8 @@ -297,7 +298,7 @@ v_mov_b32_e32 v16, s3 // ASM: .symbol: 'amd_kernel_code_t_minimal@kd' // ASM: .vgpr_count: 40 // ASM: .wavefront_size: 128 -// ASM: amdhsa.version: +// ASM: amdhsa.version: // ASM-NEXT: - 3 // ASM-NEXT: - 0 // ASM: .end_amdgpu_metadata diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s index 61dbf75c3fa93..6a824b8bcc7b9 100644 --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v4.s @@ -94,6 +94,7 @@ disabled_user_sgpr: .amdhsa_group_segment_fixed_size 1 .amdhsa_private_segment_fixed_size 1 .amdhsa_kernarg_size 8 + .amdhsa_user_sgpr_count 15 .amdhsa_user_sgpr_private_segment_buffer 1 .amdhsa_user_sgpr_dispatch_ptr 1 .amdhsa_user_sgpr_queue_ptr 1 @@ -132,6 +133,7 @@ disabled_user_sgpr: // ASM-NEXT: .amdhsa_group_segment_fixed_size 1 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1 // ASM-NEXT: .amdhsa_kernarg_size 8 +// ASM-NEXT: .amdhsa_user_sgpr_count 15 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 @@ -276,7 +278,7 @@ v_mov_b32_e32 v16, s3 .end_amdgpu_metadata // ASM: .amdgpu_metadata -// ASM: amdhsa.kernels: +// ASM: amdhsa.kernels: // ASM: - .group_segment_fixed_size: 16 // ASM: .kernarg_segment_align: 64 // ASM: .kernarg_segment_size: 8 @@ -297,7 +299,7 @@ v_mov_b32_e32 v16, s3 // ASM: .symbol: 'amd_kernel_code_t_minimal@kd' // ASM: .vgpr_count: 40 // ASM: .wavefront_size: 128 -// ASM: amdhsa.version: +// ASM: amdhsa.version: // ASM-NEXT: - 3 // ASM-NEXT: - 0 // ASM: .end_amdgpu_metadata diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s new file mode 100644 index 0000000000000..edaeafae22efd --- /dev/null +++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s @@ -0,0 +1,17 @@ +// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s + +.amdhsa_kernel implied_count_too_low_0 + .amdhsa_user_sgpr_count 0 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs +.end_amdhsa_kernel + +.amdhsa_kernel implied_count_too_low_1 + .amdhsa_user_sgpr_count 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs +.end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s new file mode 100644 index 0000000000000..ab363f91d334b --- /dev/null +++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s + +.text +// ASM: .text + +.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" + + +// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count +// ASM: .amdhsa_user_sgpr_count 15 +.amdhsa_kernel user_sgprs_implied_count_all + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_user_sgpr_dispatch_ptr 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_user_sgpr_dispatch_id 1 + .amdhsa_user_sgpr_flat_scratch_init 1 + .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + +// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_0 +// ASM: .amdhsa_user_sgpr_count 7 +.amdhsa_kernel user_sgprs_implied_count_0 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_user_sgpr_flat_scratch_init 1 + .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + +// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_1 +// ASM: .amdhsa_user_sgpr_count 9 +.amdhsa_kernel user_sgprs_implied_count_1 + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + + +// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer +// ASM: .amdhsa_user_sgpr_count 4 + .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + + +// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_16 +.amdhsa_kernel explicit_user_sgpr_count_16 + .amdhsa_user_sgpr_count 16 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + + +// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_0 +// ASM: .amdhsa_user_sgpr_count 0 + .amdhsa_kernel explicit_user_sgpr_count_0 + .amdhsa_user_sgpr_count 0 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + +// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_1 +// ASM: .amdhsa_user_sgpr_count 1 +.amdhsa_kernel explicit_user_sgpr_count_1 + .amdhsa_user_sgpr_count 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel + +.amdhsa_kernel explicit_user_sgpr_count_larger_than_implied + .amdhsa_user_sgpr_count 12 + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 32 +.end_amdhsa_kernel