diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 476528375fb88..b13181f6e7089 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -93,6 +93,10 @@ def err_drv_hipspv_no_hip_path : Error< "'--hip-path' must be specified when offloading to " "SPIR-V%select{| unless %1 is given}0.">; +// TODO: Remove when COV6 is fully supported by ROCm. +def warn_drv_amdgpu_cov6: Warning< + "code object v6 is still in development and not ready for production use yet;" + " use at your own risk">; def err_drv_undetermined_gpu_arch : Error< "cannot determine %0 architecture: %1; consider passing it via " "'%2'">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 73071a6648541..fb5f50ef452c2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4801,9 +4801,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, - Values<"none,4,5">, + Values<"none,4,5,6">, NormalizedValuesScope<"llvm::CodeObjectVersionKind">, - NormalizedValues<["COV_None", "COV_4", "COV_5"]>, + NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>, MarshallingInfoEnum, "COV_5">; defm cumode : SimpleMFlag<"cumode", diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 196be813a4896..f17e4a83305bf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17756,9 +17756,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. /// Emit code based on Code Object ABI version. /// COV_4 : Emit code to use dispatch ptr -/// COV_5 : Emit code to use implicitarg ptr +/// COV_5+ : Emit code to use implicitarg ptr /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" -/// and use its value for COV_4 or COV_5 approach. It is used for +/// and use its value for COV_4 or COV_5+ approach. It is used for /// compiling device libraries in an ABI-agnostic way. /// /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by @@ -17801,7 +17801,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); } else { Value *GEP = nullptr; - if (Cov == CodeObjectVersionKind::COV_5) { + if (Cov >= CodeObjectVersionKind::COV_5) { // Indexing the implicit kernarg segment. GEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index acc247447b985..5d570c90e5340 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2650,7 +2650,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) { void tools::checkAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { const unsigned MinCodeObjVer = 4; - const unsigned MaxCodeObjVer = 5; + const unsigned MaxCodeObjVer = 6; if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) { if (CodeObjArg->getOption().getID() == @@ -2661,6 +2661,12 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, if (Remnant || CodeObjVer < MinCodeObjVer || CodeObjVer > MaxCodeObjVer) D.Diag(diag::err_drv_invalid_int_value) << CodeObjArg->getAsString(Args) << CodeObjArg->getValue(); + + // COV6 is only supported by LLVM at the time of writing this, and it's + // expected to take some time before all ROCm components fully + // support it. In the meantime, make sure users are aware of this. + if (CodeObjVer == 6) + D.Diag(diag::warn_drv_amdgpu_cov6); } } } diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu index 663687ae227f2..d33acdf7eb8be 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu @@ -4,6 +4,9 @@ // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ // RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ +// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s + // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ // RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s @@ -15,6 +18,10 @@ // RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\ // RUN: FileCheck -check-prefix=LINKED5 %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ +// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\ +// RUN: FileCheck -check-prefix=LINKED6 %s + #include "Inputs/cuda.h" // LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 @@ -77,6 +84,36 @@ // LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef // LINKED5: "amdgpu_code_object_version", i32 500 +// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 +// LINKED6-LABEL: bar +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef + +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef + +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef +// LINKED6: "amdgpu_code_object_version", i32 600 + #ifdef DEVICELIB __device__ void bar(int *x, int *y, int *z) { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 3cb6632fc0b63..d3450a105df33 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -9,6 +9,9 @@ // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ +// RUN: -mcode-object-version=6 -o - %s | FileCheck -check-prefix=V6 %s + // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=none -o - %s | FileCheck %s -check-prefix=NONE @@ -17,5 +20,6 @@ // V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400} // V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500} +// V6: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 600} // NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", // INV: error: invalid value '4.1' in '-mcode-object-version=4.1' diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index 0c846e0936b58..f42b69f492ff8 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -7,6 +7,10 @@ // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COV5 %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \ +// RUN: | FileCheck -check-prefix=COV5 %s + // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=COVNONE %s diff --git a/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip index af5f9a3da21df..5b1ded6e74047 100644 --- a/clang/test/Driver/hip-code-object-version.hip +++ b/clang/test/Driver/hip-code-object-version.hip @@ -23,6 +23,19 @@ // V5: "-mllvm" "--amdhsa-code-object-version=5" // V5: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906" +// Check bundle ID for code object version 6. + +// RUN: not %clang -### --target=x86_64-linux-gnu \ +// RUN: -mcode-object-version=6 \ +// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=V6 %s + +// V6: warning: code object v6 is still in development and not ready for production use yet; use at your own risk +// V6: "-mcode-object-version=6" +// V6: "-mllvm" "--amdhsa-code-object-version=6" +// V6: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906" + + // Check bundle ID for code object version default // RUN: %clang -### --target=x86_64-linux-gnu \ diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 6ac5778721ba5..a998db531d668 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -187,13 +187,26 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 -// Test -mcode-object-version=5 with old device library without abi_version_400.bc +// Test -mcode-object-version=5 with old device library without abi_version_500.bc // RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: -mcode-object-version=5 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 +// Test -mcode-object-version=6 +// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=6 \ +// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6 + +// Test -mcode-object-version=6 with old device library without abi_version_600.bc +// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ +// RUN: -mcode-object-version=6 \ +// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ +// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI6 + // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} @@ -237,7 +250,10 @@ // ABI4: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc" // ABI5-NOT: error: // ABI5: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc" +// ABI6-NOT: error: +// ABI6: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_600.bc" // NOABI4-NOT: error: // NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc" // NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc" // NOABI5: error: cannot find ROCm device libraryfor ABI version 5; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library +// NOABI6: error: cannot find ROCm device libraryfor ABI version 6; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c index c587337da5933..07a75046d4e01 100644 --- a/clang/test/Misc/warning-flags.c +++ b/clang/test/Misc/warning-flags.c @@ -18,7 +18,7 @@ This test serves two purposes: The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (65): +CHECK: Warnings without flags (66): CHECK-NEXT: ext_expected_semi_decl_list CHECK-NEXT: ext_explicit_specialization_storage_class @@ -43,6 +43,7 @@ CHECK-NEXT: warn_collection_expr_type CHECK-NEXT: warn_conflicting_variadic CHECK-NEXT: warn_delete_array_type CHECK-NEXT: warn_double_const_requires_fp64 +CHECK-NEXT: warn_drv_amdgpu_cov6 CHECK-NEXT: warn_drv_assuming_mfloat_abi_is CHECK-NEXT: warn_drv_clang_unsupported CHECK-NEXT: warn_drv_pch_not_first_include diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index a3c41fb4611f5..ffde7f50087e5 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -284,6 +284,8 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, if (const llvm::opt::Arg *a = args.getLastArg( clang::driver::options::OPT_mcode_object_version_EQ)) { llvm::StringRef s = a->getValue(); + if (s == "6") + opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_6; if (s == "5") opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5; if (s == "4") diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90 index 7cb9dc079724e..455f454725282 100644 --- a/flang/test/Lower/AMD/code-object-version.f90 +++ b/flang/test/Lower/AMD/code-object-version.f90 @@ -3,11 +3,12 @@ !RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s !RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s !RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s +!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s !COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 !COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 !COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 !COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 +!COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32 subroutine target_simple end subroutine target_simple - diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index 650744db7dee3..d9440acec9dda 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -25,6 +25,7 @@ class AMDGPU final : public TargetInfo { private: uint32_t calcEFlagsV3() const; uint32_t calcEFlagsV4() const; + uint32_t calcEFlagsV6() const; public: AMDGPU(); @@ -106,6 +107,24 @@ uint32_t AMDGPU::calcEFlagsV4() const { return retMach | retXnack | retSramEcc; } +uint32_t AMDGPU::calcEFlagsV6() const { + uint32_t flags = calcEFlagsV4(); + + uint32_t genericVersion = + getEFlags(ctx.objectFiles[0]) & EF_AMDGPU_GENERIC_VERSION; + + // Verify that all input files have compatible generic version. + for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) { + if (genericVersion != (getEFlags(f) & EF_AMDGPU_GENERIC_VERSION)) { + error("incompatible generic version: " + toString(f)); + return 0; + } + } + + flags |= genericVersion; + return flags; +} + uint32_t AMDGPU::calcEFlags() const { if (ctx.objectFiles.empty()) return 0; @@ -121,6 +140,8 @@ uint32_t AMDGPU::calcEFlags() const { case ELFABIVERSION_AMDGPU_HSA_V4: case ELFABIVERSION_AMDGPU_HSA_V5: return calcEFlagsV4(); + case ELFABIVERSION_AMDGPU_HSA_V6: + return calcEFlagsV6(); default: error("unknown abi version: " + Twine(abiVersion)); return 0; diff --git a/lld/test/ELF/amdgpu-tid.s b/lld/test/ELF/amdgpu-tid.s index 6623443a4541d..ee0062eb750c8 100644 --- a/lld/test/ELF/amdgpu-tid.s +++ b/lld/test/ELF/amdgpu-tid.s @@ -43,3 +43,19 @@ # SRAMECC-OFF: EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800) # SRAMECC-ON: EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00) # SRAMECC-INCOMPATIBLE: incompatible sramecc: + +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_0.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_1.o +# RUN: ld.lld -shared %t-genericv1_0.o %t-genericv1_1.o -o %t-genericv1_2.so +# RUN: llvm-readobj --file-headers %t-genericv1_2.so | FileCheck --check-prefix=GENERICV1 %s + +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_0.o +# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_1.o +# RUN: ld.lld -shared %t-genericv2_0.o %t-genericv2_1.o -o %t-genericv2_2.so +# RUN: llvm-readobj --file-headers %t-genericv2_2.so | FileCheck --check-prefix=GENERICV2 %s + +# RUN: not ld.lld -shared %t-genericv1_0.o %t-genericv2_0.o -o /dev/null 2>&1 | FileCheck --check-prefix=GENERIC-INCOMPATIBLE %s + +# GENERICV1: EF_AMDGPU_GENERIC_VERSION_V1 (0x1000000) +# GENERICV2: EF_AMDGPU_GENERIC_VERSION_V2 (0x2000000) +# GENERIC-INCOMPATIBLE: incompatible generic version diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 81cdd39afc6ba..efd41f9812baa 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -375,7 +375,8 @@ enum { ELFABIVERSION_AMDGPU_HSA_V2 = 0, ELFABIVERSION_AMDGPU_HSA_V3 = 1, ELFABIVERSION_AMDGPU_HSA_V4 = 2, - ELFABIVERSION_AMDGPU_HSA_V5 = 3 + ELFABIVERSION_AMDGPU_HSA_V5 = 3, + ELFABIVERSION_AMDGPU_HSA_V6 = 4, }; #define ELF_RELOC(name, value) name = value, @@ -842,6 +843,12 @@ enum : unsigned { EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800, // SRAMECC is on. EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00, + + // Generic target versioning. This is contained in the list byte of EFLAGS. + EF_AMDGPU_GENERIC_VERSION = 0xff000000, + EF_AMDGPU_GENERIC_VERSION_OFFSET = 24, + EF_AMDGPU_GENERIC_VERSION_MIN = 1, + EF_AMDGPU_GENERIC_VERSION_MAX = 0xff, }; // ELF Relocation types for AMDGPU diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h index 2dae6feac0889..d5e0f4031b0f6 100644 --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -44,8 +44,15 @@ constexpr uint32_t VersionMajorV5 = 1; /// HSA metadata minor version for code object V5. constexpr uint32_t VersionMinorV5 = 2; +/// HSA metadata major version for code object V6. +constexpr uint32_t VersionMajorV6 = 1; +/// HSA metadata minor version for code object V6. +constexpr uint32_t VersionMinorV6 = 3; + /// Old HSA metadata beginning assembler directive for V2. This is only used for /// diagnostics now. + +/// HSA metadata beginning assembler directive. constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata"; /// Access qualifiers. diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h index aaaed3f5ceac6..596b73bd27e49 100644 --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -160,8 +160,8 @@ class ScopedPrinter { template void printFlags(StringRef Label, T Value, ArrayRef> Flags, TFlag EnumMask1 = {}, TFlag EnumMask2 = {}, - TFlag EnumMask3 = {}) { - SmallVector SetFlags; + TFlag EnumMask3 = {}, ArrayRef ExtraFlags = {}) { + SmallVector SetFlags(ExtraFlags.begin(), ExtraFlags.end()); for (const auto &Flag : Flags) { if (Flag.Value == 0) diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 7df8010d55c70..f64cb06b2d77f 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -129,6 +129,7 @@ namespace llvm { COV_3 = 300, // Unsupported. COV_4 = 400, COV_5 = 500, + COV_6 = 600, }; class TargetOptions { diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 31e90fea6e46a..1436e920c0112 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -620,6 +620,15 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_AMDGPU_FEATURE_XNACK_V3); BCase(EF_AMDGPU_FEATURE_SRAMECC_V3); break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V6: + for (unsigned K = ELF::EF_AMDGPU_GENERIC_VERSION_MIN; + K <= ELF::EF_AMDGPU_GENERIC_VERSION_MAX; ++K) { + std::string Key = "EF_AMDGPU_GENERIC_VERSION_V" + std::to_string(K); + IO.maskedBitSetCase(Value, Key.c_str(), + K << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET, + ELF::EF_AMDGPU_GENERIC_VERSION); + } + [[fallthrough]]; case ELF::ELFABIVERSION_AMDGPU_HSA_V4: case ELF::ELFABIVERSION_AMDGPU_HSA_V5: BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 279ef8ca2751a..db81e1ee9e389 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -335,6 +335,9 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { case AMDGPU::AMDHSA_COV5: HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5()); break; + case AMDGPU::AMDHSA_COV6: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV6()); + break; default: report_fatal_error("Unexpected code object version"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 186fa58524b9f..c20fdd51607a5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -678,6 +678,16 @@ void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); } +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV6 +//===----------------------------------------------------------------------===// + +void MetadataStreamerMsgPackV6::emitVersion() { + auto Version = HSAMetadataDoc->getArrayNode(); + Version.push_back(Version.getDocument()->getNode(VersionMajorV6)); + Version.push_back(Version.getDocument()->getNode(VersionMinorV6)); + getRootMetadata("amdhsa.version") = Version; +} } // end namespace HSAMD } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 6d6bd86711b13..26229af638f22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -135,7 +135,7 @@ class MetadataStreamerMsgPackV4 : public MetadataStreamer { const SIProgramInfo &ProgramInfo) override; }; -class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 { +class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 { protected: void emitVersion() override; void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, @@ -147,6 +147,15 @@ class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 { ~MetadataStreamerMsgPackV5() = default; }; +class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 { +protected: + void emitVersion() override; + +public: + MetadataStreamerMsgPackV6() = default; + ~MetadataStreamerMsgPackV6() = default; +}; + } // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index d7e8ab76d5ffe..5e9b1674d87dc 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/TargetParser/TargetParser.h" @@ -36,6 +37,12 @@ using namespace llvm::AMDGPU; // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// +static cl::opt + ForceGenericVersion("amdgpu-force-generic-version", + cl::desc("Force a specific generic_v flag to be " + "added. For testing purposes only."), + cl::ReallyHidden, cl::init(0)); + bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { msgpack::Document HSAMetadataDoc; if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) @@ -575,6 +582,8 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { assert(isHsaAbi(STI)); + if (CodeObjectVersion >= 6) + return getEFlagsV6(); return getEFlagsV4(); } @@ -646,6 +655,23 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { return EFlagsV4; } +unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { + unsigned Flags = getEFlagsV4(); + + unsigned Version = ForceGenericVersion; + + // Versions start at 1. + if (Version) { + if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX) + report_fatal_error("Cannot encode generic code object version " + + Twine(Version) + + " - no ELF flag can represent this version!"); + Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET); + } + + return Flags; +} + void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} void diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 7f8ddc42b2eef..ad5f27a33fcbd 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -173,6 +173,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { unsigned getEFlagsV3(); unsigned getEFlagsV4(); + unsigned getEFlagsV6(); public: AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 89c066613bd91..33335ac75df76 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -195,6 +195,8 @@ uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { return ELF::ELFABIVERSION_AMDGPU_HSA_V4; case 5: return ELF::ELFABIVERSION_AMDGPU_HSA_V5; + case 6: + return ELF::ELFABIVERSION_AMDGPU_HSA_V6; default: report_fatal_error("Unsupported AMDHSA Code Object Version " + Twine(CodeObjectVersion)); @@ -206,6 +208,7 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { case AMDHSA_COV4: return 48; case AMDHSA_COV5: + case AMDHSA_COV6: default: return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; } @@ -219,6 +222,7 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { case AMDHSA_COV4: return 24; case AMDHSA_COV5: + case AMDHSA_COV6: default: return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; } @@ -229,6 +233,7 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { case AMDHSA_COV4: return 32; case AMDHSA_COV5: + case AMDHSA_COV6: default: return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; } @@ -239,6 +244,7 @@ unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { case AMDHSA_COV4: return 40; case AMDHSA_COV5: + case AMDHSA_COV6: default: return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c0be034ff0ebd..f24b9f0e3615d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -42,7 +42,7 @@ namespace AMDGPU { struct IsaVersion; -enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 }; +enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 }; /// \returns True if \p STI is AMDHSA. bool isHsaAbi(const MCSubtargetInfo &STI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll index 4bdbe6604782a..03374e62e7e9f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -1,9 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { ; GFX8V4-LABEL: addrspacecast: diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index bae693ba2fa3b..2e43f685fd70a 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,5 +1,6 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s diff --git a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll index 07b230d8f974f..7404015891c82 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll @@ -2,6 +2,7 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV4 %s ; RUN: not llc --crash -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV6 %s ; AMDGPUAttributor deletes the function "by accident" so it's never ; codegened with optimizations. @@ -11,6 +12,7 @@ ; OPT-NEXT: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; COV4-NEXT: .amdhsa_code_object_version 4 ; COV5-NEXT: .amdhsa_code_object_version 5 +; COV6-NEXT: .amdhsa_code_object_version 6 ; OPT-NEXT: .amdgpu_metadata ; OPT-NEXT: --- ; OPT-NEXT: amdhsa.kernels: [] @@ -19,6 +21,7 @@ ; OPT-NEXT: - 1 ; COV4: - 1 ; COV5: - 2 +; COV6: - 3 ; OPT: ... define internal i32 @func() { ret i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll index f8fc3e1e76480..8178fecbbbe5f 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll @@ -7,6 +7,9 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s ; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s ; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s +; RUN: llc -filetype=obj -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s +; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s +; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s ; RUN: llc -filetype=obj -mtriple=amdgcn--amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s ; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s ; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s @@ -18,6 +21,7 @@ ; HSA: OS/ABI: AMDGPU_HSA (0x40) ; HSA4: ABIVersion: 2 ; HSA5: ABIVersion: 3 +; HSA6: ABIVersion: 4 ; PAL: OS/ABI: AMDGPU_PAL (0x41) ; PAL: ABIVersion: 0 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll index 22f90682aa973..d91c899a27ebf 100644 --- a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll +++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll @@ -1,3 +1,4 @@ +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll index 9e6c0ef86906d..30fe4a80e693b 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll @@ -1,9 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { ; GFX8V4-LABEL: addrspacecast: diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll index d5590754d78bc..a8263a317baac 100644 --- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll +++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V6 %s declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 @@ -122,6 +123,15 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) % ; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 ; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 ; V5-NEXT: ret void +; +; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0 +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] { +; V6-NEXT: call void @use_everything_else() +; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40 +; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V6-NEXT: ret void ; call void @use_everything_else() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -149,6 +159,15 @@ define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) % ; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 ; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 ; V5-NEXT: ret void +; +; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0 +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { +; V6-NEXT: call void @use_everything_else() +; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112 +; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V6-NEXT: ret void ; call void @use_everything_else() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -176,6 +195,15 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr ; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 ; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 ; V5-NEXT: ret void +; +; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0 +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] { +; V6-NEXT: call void @use_everything_else() +; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 +; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 +; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 +; V6-NEXT: ret void ; call void @use_everything_else() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -203,6 +231,15 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr ; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 ; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 ; V5-NEXT: ret void +; +; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0 +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] { +; V6-NEXT: call void @use_everything_else() +; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 +; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16 +; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16 +; V6-NEXT: ret void ; call void @use_everything_else()%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -234,7 +271,16 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo ; V5: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ; V5: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ;. +; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; V6: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +;. ; V4: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400} ;. ; V5: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500} ;. +; V6: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 600} +;. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index f4c55e602c64c..ebbbe8aaa3a11 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -1,3 +1,4 @@ +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index ff06f98df5637..494ace8a641e8 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE-V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF,ASSUME1024 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF,ASSUME1024 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch | FileCheck -check-prefixes=FLATSCR,DEFAULTSIZE %s diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll index 95c1a085ee8cf..ccf30b5a593f7 100644 --- a/llvm/test/CodeGen/AMDGPU/recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/recursion.ll @@ -1,5 +1,6 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s ; CHECK-LABEL: {{^}}recursive: ; CHECK: ScratchSize: 16 diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll index c30089a8dd32a..503b334875797 100644 --- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll @@ -1,5 +1,6 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s ; Make sure there's no assertion when trying to report the resource ; usage for a function which becomes dead during codegen. diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll index 41311abb6983f..4faaf60ef1131 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x12C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll index 3f380a97240e5..2079db73c1e46 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x22) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22) ; ELF-NEXT: ] diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll index da3f5640e6182..5fa49c53eb9f3 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll index d458f34891293..0d0a8d80dfddc 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll index 5c23e1ef5b42f..c29fb1f0adf6c 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll index e3635ba5c2acb..8f6a4ff8639f1 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll index 1b7c65a9151d8..f24e0b23f52c2 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll index bd74574746030..1493004cd4fb4 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll index 18b118fb5739c..f0af6ca864524 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x22) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22) ; ELF-NEXT: ] diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll index db6e8923165b4..5501ce92e0789 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll index 0725c779cc66b..4cec639436df4 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll @@ -1,10 +1,14 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s + ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -12,10 +16,12 @@ ; ASM: - 1 ; ASM4: - 1 ; ASM5: - 2 +; ASM6: - 3 ; ELF: OS/ABI: AMDGPU_HSA (0x40) ; ELF4: ABIVersion: 2 ; ELF5: ABIVersion: 3 +; ELF6: ABIVersion: 4 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) diff --git a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s index 248890391a6b8..4c8849e8540ba 100644 --- a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s +++ b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s @@ -3,6 +3,11 @@ // RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s // RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + // READOBJ: Section Headers // READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 // READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test similarity index 100% rename from llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test rename to llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s new file mode 100644 index 0000000000000..337938e2a57ba --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s @@ -0,0 +1,16 @@ +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -o %t.o +; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V1 + +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=4 -o %t.o +; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V4 + +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=32 -o %t.o +; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V32 + +; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=255 -o %t.o +; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V255 + +; V1: generic_v1 +; V4: generic_v4 +; V32: generic_v32 +; V255: generic_v255 diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test new file mode 100644 index 0000000000000..ae7f96c92c266 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test @@ -0,0 +1,26 @@ +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V1 +# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V1 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V32 +# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V32 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V126 +# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V126 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V255 +# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V255 + +# V1: generic_v1 +# V32: generic_v32 +# V126: generic_v126 +# V255: generic_v255 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + OSABI: ELFOSABI_AMDGPU_HSA + ABIVersion: [[ABI_VERSION]] + Type: ET_REL + Machine: EM_AMDGPU + Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, [[GENERICVER]] ] diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ce33b15b099aa..82bb12f95d3a3 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1558,134 +1558,79 @@ const EnumEntry ElfHeaderMipsFlags[] = { ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6") }; +// clang-format off +#define AMDGPU_MACH_ENUM_ENTS \ + ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201") +// clang-format on + const EnumEntry ElfHeaderAMDGPUFlagsABIVersion3[] = { - ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), - ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), - ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), - ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), - ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), - ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), - ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), - ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), + AMDGPU_MACH_ENUM_ENTS, ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_V3, "xnack"), ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_V3, "sramecc"), }; const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { - ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), - ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), - ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), - ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), - ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), - ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), - ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), - ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), - ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), - ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), + AMDGPU_MACH_ENUM_ENTS, ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ANY_V4, "xnack"), ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_OFF_V4, "xnack-"), ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ON_V4, "xnack+"), @@ -3678,6 +3623,18 @@ template void GNUELFDumper::printFileHeaders() { unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4)); break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V6: { + ElfFlags = + printFlags(e.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion4), + unsigned(ELF::EF_AMDGPU_MACH), + unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), + unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4)); + if (auto GenericV = e.e_flags & ELF::EF_AMDGPU_GENERIC_VERSION) { + ElfFlags += + ", generic_v" + + to_string(GenericV >> ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET); + } + } break; } } Str = "0x" + utohexstr(e.e_flags); @@ -6949,6 +6906,25 @@ template void LLVMELFDumper::printFileHeaders() { unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4)); break; + case ELF::ELFABIVERSION_AMDGPU_HSA_V6: { + std::optional VerFlagEntry; + // The string needs to remain alive from the moment we create a + // FlagEntry until printFlags is done. + std::string FlagStr; + if (auto VersionFlag = E.e_flags & ELF::EF_AMDGPU_GENERIC_VERSION) { + unsigned Version = + VersionFlag >> ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET; + FlagStr = "EF_AMDGPU_GENERIC_VERSION_V" + std::to_string(Version); + VerFlagEntry = FlagEntry(FlagStr, VersionFlag); + } + W.printFlags( + "Flags", E.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion4), + unsigned(ELF::EF_AMDGPU_MACH), + unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4), + unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4), + VerFlagEntry ? ArrayRef(*VerFlagEntry) : ArrayRef()); + break; + } } } else if (E.e_machine == EM_RISCV) W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderRISCVFlags));