Skip to content

Commit

Permalink
AMDGPU: Add basic gfx941 target
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D149982
  • Loading branch information
kzhuravl committed May 10, 2023
1 parent 1e46394 commit 1fc7021
Show file tree
Hide file tree
Showing 23 changed files with 99 additions and 3 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Cuda.h
Expand Up @@ -92,6 +92,7 @@ enum class CudaArch {
GFX90a,
GFX90c,
GFX940,
GFX941,
GFX1010,
GFX1011,
GFX1012,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Cuda.cpp
Expand Up @@ -114,6 +114,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(90a), // gfx90a
GFX(90c), // gfx90c
GFX(940), // gfx940
GFX(941), // gfx941
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Expand Up @@ -195,6 +195,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX941:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Expand Up @@ -3580,6 +3580,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX941:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGenOpenCL/amdgpu-features.cl
Expand Up @@ -30,6 +30,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
Expand Down Expand Up @@ -75,6 +76,7 @@
// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Expand Down
1 change: 1 addition & 0 deletions clang/test/Driver/amdgpu-macros.cl
Expand Up @@ -109,6 +109,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Driver/amdgpu-mcpu.cl
Expand Up @@ -93,6 +93,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
Expand Down Expand Up @@ -133,6 +134,7 @@
// GFX90A: "-target-cpu" "gfx90a"
// GFX90C: "-target-cpu" "gfx90c"
// GFX940: "-target-cpu" "gfx940"
// GFX941: "-target-cpu" "gfx941"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Misc/target-invalid-cpu-note.c
Expand Up @@ -29,15 +29,15 @@

// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}

// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}}

// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}

// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
Expand Down
8 changes: 8 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Expand Up @@ -385,6 +385,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item Add product
IDs names.

``gfx941`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
- tgsplit flat
- xnack scratch .. TODO::
- Packed
work-item Add product
IDs names.

**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
Expand Down Expand Up @@ -1324,6 +1331,7 @@ The AMDGPU backend uses the following ELF header:
*reserved* 0x048 Reserved.
*reserved* 0x049 Reserved.
*reserved* 0x04a Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
==================================== ========== =============================

Sections
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/BinaryFormat/ELF.h
Expand Up @@ -781,10 +781,11 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X48 = 0x048,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4A = 0x04a,
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,

// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4A,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX941,

// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/TargetParser/TargetParser.h
Expand Up @@ -84,6 +84,7 @@ enum GPUKind : uint32_t {
GK_GFX90A = 66,
GK_GFX90C = 67,
GK_GFX940 = 68,
GK_GFX941 = 69,

GK_GFX1010 = 71,
GK_GFX1011 = 72,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Object/ELFObjectFile.cpp
Expand Up @@ -463,6 +463,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90c";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
return "gfx940";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
return "gfx941";

// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Expand Up @@ -590,6 +590,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Expand Up @@ -1227,6 +1227,40 @@ def FeatureISAVersion9_4_0 : FeatureSet<
FullRate64Ops,
FeatureBackOffBarrier]>;

def FeatureISAVersion9_4_1 : FeatureSet<
[FeatureGFX9,
FeatureGFX90AInsts,
FeatureGFX940Insts,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureFmacF64Inst,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot3Insts,
FeatureDot4Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
FeatureDot10Insts,
FeatureAtomicDsPkAdd16Insts,
FeatureAtomicFlatPkAdd16Insts,
Feature64BitDPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeatureFP8Insts,
FeaturePkFmacF16Inst,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureAtomicBufferGlobalPkAddF16Insts,
FeatureAtomicGlobalPkAddBF16Inst,
FeatureFlatAtomicFaddF32Inst,
FeatureSupportsSRAMECC,
FeaturePackedTID,
FeatureArchitectedFlatScratch,
FullRate64Ops,
FeatureBackOffBarrier]>;

// TODO: Organize more features into groups.
def FeatureGroup {
// Bugs present on gfx10.1.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNProcessors.td
Expand Up @@ -196,6 +196,10 @@ def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_0.Features
>;

def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_1.Features
>;

//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Expand Up @@ -107,6 +107,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
Expand Down Expand Up @@ -176,6 +177,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/TargetParser/TargetParser.cpp
Expand Up @@ -105,6 +105,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
Expand Down Expand Up @@ -224,6 +225,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
Expand Down Expand Up @@ -322,6 +324,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
break;
case GK_GFX941:
case GK_GFX940:
Features["gfx940-insts"] = true;
Features["fp8-insts"] = true;
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
Expand Up @@ -74,6 +74,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=GFX940 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX940-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX940-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 < %s | FileCheck --check-prefixes=GFX941 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX941-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX941-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
Expand Down Expand Up @@ -152,6 +155,9 @@
; GFX940: .amdgcn_target "amdgcn-amd-amdhsa--gfx940"
; GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack-"
; GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack+"
; GFX941: .amdgcn_target "amdgcn-amd-amdhsa--gfx941"
; GFX941-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack-"
; GFX941-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
Expand Up @@ -55,6 +55,7 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
Expand Down Expand Up @@ -122,6 +123,7 @@
; GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
Expand Down

0 comments on commit 1fc7021

Please sign in to comment.