Skip to content

Commit

Permalink
[AMDGPU] Add gfx940 target
Browse files Browse the repository at this point in the history
This is target definition only.

Differential Revision: https://reviews.llvm.org/D120688
  • Loading branch information
rampitec committed Mar 2, 2022
1 parent 59262d2 commit 2e2e64d
Show file tree
Hide file tree
Showing 31 changed files with 329 additions and 22 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ enum class CudaArch {
GFX909,
GFX90a,
GFX90c,
GFX940,
GFX1010,
GFX1011,
GFX1012,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(909), // gfx909
GFX(90a), // gfx90a
GFX(90c), // gfx90c
GFX(940), // gfx940
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
break;
case GK_GFX940:
Features["gfx940-insts"] = true;
LLVM_FALLTHROUGH;
case GK_GFX90A:
Features["gfx90a-insts"] = true;
LLVM_FALLTHROUGH;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3916,6 +3916,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGenOpenCL/amdgpu-features.cl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
Expand Down Expand Up @@ -58,6 +59,7 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
Expand Down
1 change: 1 addition & 0 deletions clang/test/Driver/amdgpu-macros.cl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx909
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Driver/amdgpu-mcpu.cl
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefix=GFX909 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
Expand Down Expand Up @@ -126,6 +127,7 @@
// GFX909: "-target-cpu" "gfx909"
// GFX90A: "-target-cpu" "gfx90a"
// GFX90C: "-target-cpu" "gfx90c"
// GFX940: "-target-cpu" "gfx940"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Driver/cuda-bad-arch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -x hip -target x86_64-linux-gnu --cuda-gpu-arch=gfx90a -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=gfx940 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s

Expand Down
4 changes: 2 additions & 2 deletions clang/test/Misc/target-invalid-cpu-note.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@

// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}

// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}}

// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}

// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
Expand Down
36 changes: 24 additions & 12 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
- Ryzen 3 Pro 4350G
- Ryzen 3 Pro 4350GE

``gfx940`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
- tgsplit flat
- xnack scratch .. TODO::
- Packed
work-item Add product
IDs names.

**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
Expand Down Expand Up @@ -1224,7 +1231,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
*reserved* 0x040 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
*reserved* 0x041 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
*reserved* 0x043 Reserved.
Expand Down Expand Up @@ -3866,7 +3873,7 @@ The fields used by CP for code objects before V3 also match those specified in
bytes
383:352 4 bytes COMPUTE_PGM_RSRC3 GFX6-GFX9
Reserved, must be 0.
GFX90A
GFX90A, GFX940
Compute Shader (CS)
program settings used by
CP to set up
Expand Down Expand Up @@ -3960,7 +3967,7 @@ The fields used by CP for code objects before V3 also match those specified in
GFX6-GFX9
- vgprs_used 0..256
- max(0, ceil(vgprs_used / 4) - 1)
GFX90A
GFX90A, GFX940
- vgprs_used 0..512
- vgprs_used = align(arch_vgprs, 4)
+ acc_vgprs
Expand Down Expand Up @@ -4408,7 +4415,7 @@ The fields used by CP for code objects before V3 also match those specified in

..

.. table:: compute_pgm_rsrc3 for GFX90A
.. table:: compute_pgm_rsrc3 for GFX90A, GFX940
:name: amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table

======= ======= =============================== ===========================================================================
Expand Down Expand Up @@ -12268,7 +12275,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_user_sgpr_count`` 0 GFX6-GFX10 Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`
``.amdhsa_user_sgpr_private_segment_buffer`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
(except :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
GFX940)
``.amdhsa_user_sgpr_dispatch_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_PTR in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_queue_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_QUEUE_PTR in
Expand All @@ -12278,14 +12286,18 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_user_sgpr_dispatch_id`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_ID in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_flat_scratch_init`` 0 GFX6-GFX10 Controls ENABLE_SGPR_FLAT_SCRATCH_INIT in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
(except :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
GFX940)
``.amdhsa_user_sgpr_private_segment_size`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_wavefront_size32`` Target GFX10 Controls ENABLE_WAVEFRONT_SIZE32 in
Feature :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
Specific
(wavefrontsize64)
``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in
(except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
GFX940)
``.amdhsa_enable_private_segment`` 0 GFX940 Controls ENABLE_PRIVATE_SEGMENT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
``.amdhsa_system_sgpr_workgroup_id_x`` 1 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_ID_X in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
Expand All @@ -12305,15 +12317,15 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_next_free_sgpr`` Required GFX6-GFX10 Maximum SGPR number explicitly referenced, plus one.
Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_accum_offset`` Required GFX90A Offset of a first AccVGPR in the unified register file.
Used to calculate ACCUM_OFFSET in
``.amdhsa_accum_offset`` Required GFX90A, Offset of a first AccVGPR in the unified register file.
GFX940 Used to calculate ACCUM_OFFSET in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
``.amdhsa_reserve_vcc`` 1 GFX6-GFX10 Whether the kernel may use the special VCC SGPR.
Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_reserve_flat_scratch`` 1 GFX7-GFX10 Whether the kernel may use flat instructions to access
scratch memory. Used to calculate
GRANULATED_WAVEFRONT_SGPR_COUNT in
(except scratch memory. Used to calculate
GFX940) GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_reserve_xnack_mask`` Target GFX8-GFX10 Whether the kernel may trigger XNACK replay.
Feature Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
Expand Down Expand Up @@ -12341,8 +12353,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_fp16_overflow`` 0 GFX9-GFX10 Controls FP16_OVFL in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_tg_split`` Target GFX90A Controls TG_SPLIT in
Feature :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
``.amdhsa_tg_split`` Target GFX90A, Controls TG_SPLIT in
Feature GFX940 :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
Specific
(tgsplit)
``.amdhsa_workgroup_processor_mode`` Target GFX10 Controls ENABLE_WGP_MODE in
Expand Down
Loading

0 comments on commit 2e2e64d

Please sign in to comment.