Skip to content

Commit

Permalink
[AMDGPU] Add gfx10 assembler directive to specify shared VGPR count
Browse files Browse the repository at this point in the history
Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D105507
  • Loading branch information
lamb-j committed Mar 7, 2022
1 parent 0405920 commit 5160447
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 2 deletions.
8 changes: 6 additions & 2 deletions llvm/docs/AMDGPUUsage.rst
Expand Up @@ -4451,8 +4451,10 @@ The fields used by CP for code objects before V3 also match those specified in
======= ======= =============================== ===========================================================================
Bits Size Field Name Description
======= ======= =============================== ===========================================================================
3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120.
compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64.
3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPR blocks when executing in subvector mode. For
wavefront size 64 the value is 0-15, representing 0-120 VGPRs (granularity
of 8), such that (compute_pgm_rsrc1.vgprs +1)*4 + shared_vgpr_count*8 does
not exceed 256. For wavefront size 32 shared_vgpr_count must be 0.
31:4 28 Reserved, must be 0.
bits
32 **Total size 4 bytes.**
Expand Down Expand Up @@ -12372,6 +12374,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_forward_progress`` 0 GFX10 Controls FWD_PROGRESS in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_shared_vgpr_count`` 0 GFX10 Controls SHARED_VGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`.
``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -4645,6 +4645,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
uint64_t AccumOffset = 0;
uint64_t SharedVGPRCount = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;

Expand Down Expand Up @@ -4872,6 +4873,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
SharedVGPRCount = Val;
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
Expand Down Expand Up @@ -4961,6 +4969,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
(AccumOffset / 4 - 1));
}

if (IVersion.Major == 10) {
// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
if (SharedVGPRCount && EnableWavefrontSize32) {
return TokError("shared_vgpr_count directive not valid on "
"wavefront size 32");
}
if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
return TokError("shared_vgpr_count*2 + "
"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
"exceed 63\n");
}
}

getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
ReserveFlatScr);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Expand Up @@ -447,6 +447,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT);
}
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
Expand Down
43 changes: 43 additions & 0 deletions llvm/test/MC/AMDGPU/hsa-diag-v3.s
Expand Up @@ -225,6 +225,49 @@
.amdhsa_forward_progress 5
.end_amdhsa_kernel

// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid1
// NONGFX10: error: directive requires gfx10+
// GFX10: error: .amdhsa_next_free_vgpr directive is required
// NONAMDHSA: error: unknown directive
.warning "test_amdhsa_shared_vgpr_count_invalid1"
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid1
.amdhsa_shared_vgpr_count 8
.end_amdhsa_kernel

// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid2
// NONGFX10: error: directive requires gfx10+
// GFX10: error: shared_vgpr_count directive not valid on wavefront size 32
// NONAMDHSA: error: unknown directive
.warning "test_amdhsa_shared_vgpr_count_invalid2"
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid2
.amdhsa_next_free_vgpr 16
.amdhsa_next_free_sgpr 0
.amdhsa_shared_vgpr_count 8
.amdhsa_wavefront_size32 1
.end_amdhsa_kernel

// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid3
// NONGFX10: error: directive requires gfx10+
// GFX10: error: value out of range
// NONAMDHSA: error: unknown directive
.warning "test_amdhsa_shared_vgpr_count_invalid3"
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid3
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 0
.amdhsa_shared_vgpr_count 16
.end_amdhsa_kernel

// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid4
// NONGFX10: error: directive requires gfx10+
// GFX10: error: shared_vgpr_count*2 + compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot exceed 63
// NONAMDHSA: error: unknown directive
.warning "test_amdhsa_shared_vgpr_count_invalid4"
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid4
.amdhsa_next_free_vgpr 273
.amdhsa_next_free_sgpr 0
.amdhsa_shared_vgpr_count 15
.end_amdhsa_kernel

// GCN-LABEL: warning: test_next_free_vgpr_invalid
// AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
// NONAMDHSA-NOT: error:
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
Expand Up @@ -68,11 +68,13 @@ special_sgpr:
.amdhsa_kernel minimal
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_shared_vgpr_count 0
.end_amdhsa_kernel

// ASM: .amdhsa_kernel minimal
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
// ASM: .amdhsa_shared_vgpr_count 0
// ASM: .end_amdhsa_kernel

// Test that we can specify all available directives with non-default values.
Expand Down Expand Up @@ -153,6 +155,7 @@ special_sgpr:
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
Expand Down

0 comments on commit 5160447

Please sign in to comment.