Skip to content

Commit

Permalink
[NFC][AMDGPU] Refactor AMDGPUDisassembler
Browse files Browse the repository at this point in the history
Clean up ahead of a patch to fix bugs in the AMDGPUDisassembler.

Use split-file to simplify and extend existing kernel-descriptor
disassembly tests.

Add a comment to AMDHSAKernelDescriptor.h, as at least one small set
towards keeping all kernel-descriptor sensitive code in sync.

Reviewed By: MaskRay, kzhuravl, arsenm

Differential Revision: https://reviews.llvm.org/D130105
  • Loading branch information
slinder1 committed Jun 29, 2023
1 parent 45c97af commit ede070a
Show file tree
Hide file tree
Showing 5 changed files with 285 additions and 62 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
Expand Up @@ -9,6 +9,13 @@
/// \file
/// AMDHSA kernel descriptor definitions. For more information, visit
/// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
///
/// \warning
/// Any changes to this file should also be audited for corresponding changes
/// needed in both the assembler and disassembler, namely:
/// * AMDGPUAsmPrinter.{cpp,h}
/// * AMDGPUTargetStreamer.{cpp,h}
/// * AMDGPUDisassembler.{cpp,h}
//
//===----------------------------------------------------------------------===//

Expand Down
40 changes: 17 additions & 23 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Expand Up @@ -1634,10 +1634,10 @@ bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
do { \
KdStream << Indent << DIRECTIVE " " \
<< ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
} while (0)

// NOLINTNEXTLINE(readability-identifier-naming)
Expand All @@ -1652,8 +1652,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
// simply calculate the inverse of what the assembler does.

uint32_t GranulatedWorkitemVGPRCount =
(FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);

uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
Expand All @@ -1680,8 +1679,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
// The disassembler cannot recover the original values of those 3 directives.

uint32_t GranulatedWavefrontSGPRCount =
(FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);

if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
return MCDisassembler::Fail;
Expand Down Expand Up @@ -1791,7 +1789,17 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
return MCDisassembler::Success;
}

// NOLINTNEXTLINE(readability-identifier-naming)
MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
using namespace amdhsa;
if (!isGFX10Plus() && FourByteBuffer) {
return MCDisassembler::Fail;
}
return MCDisassembler::Success;
}
#undef PRINT_DIRECTIVE
#undef GET_FIELD

MCDisassembler::DecodeStatus
AMDGPUDisassembler::decodeKernelDescriptorDirective(
Expand Down Expand Up @@ -1859,30 +1867,16 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
return MCDisassembler::Success;

case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
// COMPUTE_PGM_RSRC3
// - Only set for GFX10, GFX6-9 have this to be 0.
// - Currently no directives directly control this.
FourByteBuffer = DE.getU32(Cursor);
if (!isGFX10Plus() && FourByteBuffer) {
return MCDisassembler::Fail;
}
return MCDisassembler::Success;
return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);

case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
MCDisassembler::Fail) {
return MCDisassembler::Fail;
}
return MCDisassembler::Success;
return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);

case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
MCDisassembler::Fail) {
return MCDisassembler::Fail;
}
return MCDisassembler::Success;
return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);

case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
using namespace amdhsa;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Expand Up @@ -167,6 +167,13 @@ class AMDGPUDisassembler : public MCDisassembler {
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;

/// Decode as directives that handle COMPUTE_PGM_RSRC3.
/// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC3.
/// \param KdStream - Stream to write the disassembled directives to.
// NOLINTNEXTLINE(readability-identifier-naming)
DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;

DecodeStatus convertEXPInst(MCInst &MI) const;
DecodeStatus convertVINTERPInst(MCInst &MI) const;
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
Expand Down
147 changes: 127 additions & 20 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
@@ -1,26 +1,51 @@
;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor.

; RUN: split-file %s %t.dir

; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +7 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble

; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +7 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble

; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +7 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble

; RUN: rm -rf %t && split-file %s %t && cd %t

;--- 1.s
;; Only set next_free_sgpr.
.amdhsa_kernel my_kernel_1
; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx908 < 1.s > 1.o
; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx908 < 1-disasm.s > 1-disasm.o
; RUN: cmp 1.o 1-disasm.o
; CHECK: .amdhsa_kernel kernel
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 4
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
; CHECK-NEXT: .amdhsa_next_free_sgpr 48
; CHECK-NEXT: .amdhsa_float_round_mode_32 0
; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
; CHECK-NEXT: .amdhsa_dx10_clamp 1
; CHECK-NEXT: .amdhsa_ieee_mode 1
; CHECK-NEXT: .amdhsa_fp16_overflow 0
; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 42
.amdhsa_reserve_flat_scratch 0
Expand All @@ -30,7 +55,48 @@

;--- 2.s
;; Only set other directives.
.amdhsa_kernel my_kernel_2
; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx908 < 2.s > 2.o
; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx908 < 2-disasm.s > 2-disasm.o
; RUN: cmp 2.o 2-disasm.o
; CHECK: .amdhsa_kernel kernel
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 4
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
; CHECK-NEXT: .amdhsa_next_free_sgpr 8
; CHECK-NEXT: .amdhsa_float_round_mode_32 0
; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
; CHECK-NEXT: .amdhsa_dx10_clamp 1
; CHECK-NEXT: .amdhsa_ieee_mode 1
; CHECK-NEXT: .amdhsa_fp16_overflow 0
; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_reserve_flat_scratch 1
Expand All @@ -40,7 +106,48 @@

;--- 3.s
;; Set all affecting directives.
.amdhsa_kernel my_kernel_3
; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx908 < 3.s > 3.o
; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s
; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx908 < 3-disasm.s > 3-disasm.o
; RUN: cmp 3.o 3-disasm.o
; CHECK: .amdhsa_kernel kernel
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 4
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
; CHECK-NEXT: .amdhsa_next_free_sgpr 48
; CHECK-NEXT: .amdhsa_float_round_mode_32 0
; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
; CHECK-NEXT: .amdhsa_dx10_clamp 1
; CHECK-NEXT: .amdhsa_ieee_mode 1
; CHECK-NEXT: .amdhsa_fp16_overflow 0
; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 35
.amdhsa_reserve_flat_scratch 1
Expand Down

0 comments on commit ede070a

Please sign in to comment.