diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 8bb03249b7f83..ba3acd0295871 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -83,7 +83,7 @@ class TargetOptions { enum CodeObjectVersionKind { COV_None, COV_2 = 200, // Unsupported. - COV_3 = 300, + COV_3 = 300, // Unsupported. COV_4 = 400, COV_5 = 500, }; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5219f90650d07..36052511203f6 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4681,9 +4681,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, Visibility<[ClangOption, CC1Option]>, - Values<"none,3,4,5">, + Values<"none,4,5">, NormalizedValuesScope<"TargetOptions">, - NormalizedValues<["COV_None", "COV_3", "COV_4", "COV_5"]>, + NormalizedValues<["COV_None", "COV_4", "COV_5"]>, MarshallingInfoEnum, "COV_4">; defm cumode : SimpleMFlag<"cumode", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ad012d3d0d4b4..e19f1829c9fa8 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2349,7 +2349,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) { void tools::checkAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - const unsigned MinCodeObjVer = 3; + const unsigned MinCodeObjVer = 4; const unsigned MaxCodeObjVer = 5; if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) { diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 0ddd63faf46f2..ff5deaf9ab850 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -3,9 +3,6 @@ // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -o - %s | FileCheck %s -check-prefix=V4 -// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s - // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s @@ -18,7 +15,6 @@ // RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV -// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300} // V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400} // V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500} // NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip index 33559b6576e7d..af5f9a3da21df 100644 --- a/clang/test/Driver/hip-code-object-version.hip +++ b/clang/test/Driver/hip-code-object-version.hip @@ -1,20 +1,5 @@ // REQUIRES: amdgpu-registered-target -// Check bundle ID for code object v3. - -// RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -mcode-object-version=3 \ -// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ -// RUN: %s 2>&1 | FileCheck -check-prefix=V3 %s - -// RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -mcode-object-version=4 -mcode-object-version=3 \ -// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ -// RUN: %s 2>&1 | FileCheck -check-prefix=V3 %s - -// V3: "-mcode-object-version=3" -// V3: "-mllvm" "--amdhsa-code-object-version=3" -// V3: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906" // Check bundle ID for code object version 4. @@ -62,6 +47,13 @@ // INVALID_2: error: invalid integral value '2' in '-mcode-object-version=2' // INVALID_2-NOT: error: invalid integral value +// RUN: not %clang -### --target=x86_64-linux-gnu \ +// RUN: -mcode-object-version=3 \ +// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_3 %s +// INVALID_3: error: invalid integral value '3' in '-mcode-object-version=3' +// INVALID_3-NOT: error: invalid integral value + // Check LLVM code object version option --amdhsa-code-object-version // is passed to -cc1 and -cc1as, and -mcode-object-version is passed // to -cc1 but not -cc1as. diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index 71d9554da696b..6ac5778721ba5 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -168,12 +168,6 @@ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 -// Test -mcode-object-version=3 -// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ -// RUN: -mcode-object-version=3 \ -// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 - // Test -mcode-object-version=4 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: -mcode-object-version=4 \ diff --git a/lld/test/ELF/amdgpu-abi-version.s b/lld/test/ELF/amdgpu-abi-version.s index 455a52aec9210..72b67fdaeb1a1 100644 --- a/lld/test/ELF/amdgpu-abi-version.s +++ b/lld/test/ELF/amdgpu-abi-version.s @@ -1,11 +1,3 @@ -# REQUIRES: amdgpu -# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o -# RUN: ld.lld -shared %t.o -o %t.so -# RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV3 %s - -# COV3: OS/ABI: AMDGPU_HSA (0x40) -# COV3: ABIVersion: 1 - # RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t.o # RUN: ld.lld -shared %t.o -o %t.so # RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV4 %s diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index a3a2f86acfe89..3148d4bebb96b 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1462,12 +1462,10 @@ The AMDGPU backend uses the following ELF header: object conforms: * ``ELFABIVERSION_AMDGPU_HSA_V2`` is used to specify the version of AMD HSA - runtime ABI for code object V2. Specify using the Clang option - ``-mcode-object-version=2``. + runtime ABI for code object V2. Can no longer be emitted by this version of LLVM. * ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA - runtime ABI for code object V3. Specify using the Clang option - ``-mcode-object-version=3``. + runtime ABI for code object V3. Can no longer be emitted by this version of LLVM. * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA runtime ABI for code object V4. Specify using the Clang option @@ -3455,8 +3453,7 @@ Code Object V3 Metadata +++++++++++++++++++++++ .. warning:: - Code object V3 is not the default code object version emitted by this version - of LLVM. + Code object V3 generation is no longer supported by this version of LLVM. Code object V3 and above metadata is specified by the ``NT_AMDGPU_METADATA`` note record (see :ref:`amdgpu-note-records-v3-onwards`). diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index c0bb305d19a6b..eb30f31af6d6b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -327,9 +327,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV3: - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); - break; case AMDGPU::AMDHSA_COV4: HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 5060cd3aec581..b51a876750b58 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -49,14 +49,14 @@ namespace AMDGPU { namespace HSAMD { //===----------------------------------------------------------------------===// -// HSAMetadataStreamerV3 +// HSAMetadataStreamerV4 //===----------------------------------------------------------------------===// -void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const { +void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } -void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { +void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { errs() << "AMDGPU HSA Metadata Parser Test: "; msgpack::Document FromHSAMetadataString; @@ -78,7 +78,7 @@ void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const { } std::optional -MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { +MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { return StringSwitch>(AccQual) .Case("read_only", StringRef("read_only")) .Case("write_only", StringRef("write_only")) @@ -86,7 +86,7 @@ MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const { .Default(std::nullopt); } -std::optional MetadataStreamerMsgPackV3::getAddressSpaceQualifier( +std::optional MetadataStreamerMsgPackV4::getAddressSpaceQualifier( unsigned AddressSpace) const { switch (AddressSpace) { case AMDGPUAS::PRIVATE_ADDRESS: @@ -107,7 +107,7 @@ std::optional MetadataStreamerMsgPackV3::getAddressSpaceQualifier( } StringRef -MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, +MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const { if (TypeQual.contains("pipe")) return "pipe"; @@ -134,7 +134,7 @@ MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual, : "by_value"); } -std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, +std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, bool Signed) const { switch (Ty->getTypeID()) { case Type::IntegerTyID: { @@ -173,7 +173,7 @@ std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty, } msgpack::ArrayDocNode -MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { +MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { auto Dims = HSAMetadataDoc->getArrayNode(); if (Node->getNumOperands() != 3) return Dims; @@ -184,14 +184,20 @@ MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const { return Dims; } -void MetadataStreamerMsgPackV3::emitVersion() { +void MetadataStreamerMsgPackV4::emitVersion() { auto Version = HSAMetadataDoc->getArrayNode(); - Version.push_back(Version.getDocument()->getNode(VersionMajorV3)); - Version.push_back(Version.getDocument()->getNode(VersionMinorV3)); + Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); + Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); getRootMetadata("amdhsa.version") = Version; } -void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { +void MetadataStreamerMsgPackV4::emitTargetID( + const IsaInfo::AMDGPUTargetID &TargetID) { + getRootMetadata("amdhsa.target") = + HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); +} + +void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); if (!Node) return; @@ -204,7 +210,7 @@ void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) { getRootMetadata("amdhsa.printf") = Printf; } -void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, +void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern) { // TODO: What about other languages? auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); @@ -223,7 +229,7 @@ void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func, Kern[".language_version"] = LanguageVersion; } -void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, +void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) { if (auto Node = Func.getMetadata("reqd_work_group_size")) @@ -248,7 +254,7 @@ void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func, Kern[".kind"] = Kern.getDocument()->getNode("fini"); } -void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, +void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern) { auto &Func = MF.getFunction(); unsigned Offset = 0; @@ -261,7 +267,7 @@ void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF, Kern[".args"] = Args; } -void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, +void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args) { auto Func = Arg.getParent(); @@ -326,7 +332,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg, AccQual, TypeQual); } -void MetadataStreamerMsgPackV3::emitKernelArg( +void MetadataStreamerMsgPackV4::emitKernelArg( const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName, StringRef BaseTypeName, @@ -375,7 +381,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg( Args.push_back(Arg); } -void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( +void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { auto &Func = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); @@ -448,9 +454,10 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( } } -msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( - const MachineFunction &MF, const SIProgramInfo &ProgramInfo, - unsigned CodeObjectVersion) const { +msgpack::MapDocNode +MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo, + unsigned CodeObjectVersion) const { const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); const Function &F = MF.getFunction(); @@ -495,18 +502,19 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( return Kern; } -bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { +bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); } -void MetadataStreamerMsgPackV3::begin(const Module &Mod, +void MetadataStreamerMsgPackV4::begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) { emitVersion(); + emitTargetID(TargetID); emitPrintf(Mod); getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); } -void MetadataStreamerMsgPackV3::end() { +void MetadataStreamerMsgPackV4::end() { std::string HSAMetadataString; raw_string_ostream StrOS(HSAMetadataString); HSAMetadataDoc->toYAML(StrOS); @@ -517,7 +525,7 @@ void MetadataStreamerMsgPackV3::end() { verify(StrOS.str()); } -void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, +void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && @@ -542,31 +550,6 @@ void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, Kernels.push_back(Kern); } -//===----------------------------------------------------------------------===// -// HSAMetadataStreamerV4 -//===----------------------------------------------------------------------===// - -void MetadataStreamerMsgPackV4::emitVersion() { - auto Version = HSAMetadataDoc->getArrayNode(); - Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); - Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); - getRootMetadata("amdhsa.version") = Version; -} - -void MetadataStreamerMsgPackV4::emitTargetID( - const IsaInfo::AMDGPUTargetID &TargetID) { - getRootMetadata("amdhsa.target") = - HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); -} - -void MetadataStreamerMsgPackV4::begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) { - emitVersion(); - emitTargetID(TargetID); - emitPrintf(Mod); - getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); -} - //===----------------------------------------------------------------------===// // HSAMetadataStreamerV5 //===----------------------------------------------------------------------===// @@ -680,7 +663,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern) { - MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern); + MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern); if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index d2b3b8917ce0f..18a7b5d7a9633 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -62,7 +62,7 @@ class MetadataStreamer { msgpack::MapDocNode Kern) = 0; }; -class MetadataStreamerMsgPackV3 : public MetadataStreamer { +class MetadataStreamerMsgPackV4 : public MetadataStreamer { protected: std::unique_ptr HSAMetadataDoc = std::make_unique(); @@ -89,6 +89,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { void emitVersion() override; + void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); + void emitPrintf(const Module &Mod); void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern); @@ -120,8 +122,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { } public: - MetadataStreamerMsgPackV3() = default; - ~MetadataStreamerMsgPackV3() = default; + MetadataStreamerMsgPackV4() = default; + ~MetadataStreamerMsgPackV4() = default; bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; @@ -134,19 +136,6 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { const SIProgramInfo &ProgramInfo) override; }; -class MetadataStreamerMsgPackV4 : public MetadataStreamerMsgPackV3 { -protected: - void emitVersion() override; - void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); - -public: - MetadataStreamerMsgPackV4() = default; - ~MetadataStreamerMsgPackV4() = default; - - void begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) override; -}; - class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 { protected: void emitVersion() override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 7518387aeebc6..7bf411bd6a07c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6469,11 +6469,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return legalizeTrapEndpgm(MI, MRI, B); - const Module *M = B.getMF().getFunction().getParent(); - unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); - if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3) - return legalizeTrapHsaQueuePtr(MI, MRI, B); - return ST.supportsGetDoorbellID() ? legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 70350b83849aa..e635a9182fc08 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -424,7 +424,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( switch (CodeObjectVersion) { default: break; - case AMDGPU::AMDHSA_COV3: case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: if (getTargetID()->isXnackSupported()) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5c46d81f57af6..cff7e4bc66218 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6079,11 +6079,6 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return lowerTrapEndpgm(Op, DAG); - const Module *M = DAG.getMachineFunction().getFunction().getParent(); - unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); - if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3) - return lowerTrapHsaQueuePtr(Op, DAG); - return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6060e39afc5a5..7b8a37532c9fa 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -128,8 +128,6 @@ std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) { return std::nullopt; switch (AmdhsaCodeObjectVersion) { - case 3: - return ELF::ELFABIVERSION_AMDGPU_HSA_V3; case 4: return ELF::ELFABIVERSION_AMDGPU_HSA_V4; case 5: @@ -140,12 +138,6 @@ std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) { } } -bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { - if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) - return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; - return false; -} - bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { if (std::optional HsaAbiVer = getHsaAbiVersion(STI)) return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; @@ -174,7 +166,6 @@ unsigned getCodeObjectVersion(const Module &M) { unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 48; case AMDHSA_COV5: @@ -188,7 +179,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { // central TD file. unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 24; case AMDHSA_COV5: @@ -199,7 +189,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 32; case AMDHSA_COV5: @@ -210,7 +199,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { switch (CodeObjectVersion) { - case AMDHSA_COV3: case AMDHSA_COV4: return 40; case AMDHSA_COV5: @@ -774,15 +762,6 @@ std::string AMDGPUTargetID::toString() const { std::string Features; if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV3: - // xnack. - if (isXnackOnOrAny()) - Features += "+xnack"; - // In code object v2 and v3, "sramecc" feature was spelled with a - // hyphen ("sram-ecc"). - if (isSramEccOnOrAny()) - Features += "+sram-ecc"; - break; case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: // sramecc. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index bb2964f592f66..1e0994d0862cf 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -42,19 +42,12 @@ namespace AMDGPU { struct IsaVersion; -enum { - AMDHSA_COV3 = 3, - AMDHSA_COV4 = 4, - AMDHSA_COV5 = 5 -}; +enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 }; /// \returns True if \p STI is AMDHSA. bool isHsaAbi(const MCSubtargetInfo &STI); /// \returns HSA OS ABI Version identification. std::optional getHsaAbiVersion(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 3, -/// false otherwise. -bool isHsaAbiVersion3(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 4, /// false otherwise. bool isHsaAbiVersion4(const MCSubtargetInfo *STI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll index c25ecafa1f7c0..4bdbe6604782a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -1,38 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { -; GFX8V3-LABEL: addrspacecast: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40 -; GFX8V3-NEXT: v_mov_b32_e32 v2, 1 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_mov_b32 s4, s0 -; GFX8V3-NEXT: s_mov_b32 s5, s3 -; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V3-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 -; GFX8V3-NEXT: s_mov_b32 s6, s1 -; GFX8V3-NEXT: s_mov_b32 s7, s2 -; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V3-NEXT: s_cselect_b64 s[0:1], s[6:7], 0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V3-NEXT: flat_store_dword v[0:1], v2 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v2, 2 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_store_dword v[0:1], v2 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -82,30 +55,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: addrspacecast: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base -; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base -; GFX9V3-NEXT: v_mov_b32_e32 v2, 1 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_mov_b32 s2, s0 -; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 -; GFX9V3-NEXT: s_mov_b32 s4, s1 -; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s2 -; GFX9V3-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s3 -; GFX9V3-NEXT: flat_store_dword v[0:1], v2 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: v_mov_b32_e32 v2, 2 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX9V3-NEXT: flat_store_dword v[0:1], v2 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: addrspacecast: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -161,19 +110,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr } define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_shared: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -200,18 +136,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_shared: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_shared_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3 -; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_shared: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -242,19 +166,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { } define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_private: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -281,18 +192,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_private: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3 -; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_private: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -323,11 +222,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { } define amdgpu_kernel void @llvm_trap() { -; GFX8V3-LABEL: llvm_trap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX8V3-NEXT: s_trap 2 -; ; GFX8V4-LABEL: llvm_trap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5] @@ -339,11 +233,6 @@ define amdgpu_kernel void @llvm_trap() { ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_trap 2 ; -; GFX9V3-LABEL: llvm_trap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX9V3-NEXT: s_trap 2 -; ; GFX9V4-LABEL: llvm_trap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 2 @@ -356,10 +245,6 @@ define amdgpu_kernel void @llvm_trap() { } define amdgpu_kernel void @llvm_debugtrap() { -; GFX8V3-LABEL: llvm_debugtrap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_trap 3 -; ; GFX8V4-LABEL: llvm_debugtrap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_trap 3 @@ -368,10 +253,6 @@ define amdgpu_kernel void @llvm_debugtrap() { ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_trap 3 ; -; GFX9V3-LABEL: llvm_debugtrap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_trap 3 -; ; GFX9V4-LABEL: llvm_debugtrap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 3 @@ -384,32 +265,6 @@ define amdgpu_kernel void @llvm_debugtrap() { } define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s7 -; GFX8V3-NEXT: s_add_u32 s0, s8, 8 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_addc_u32 s1, s9, 0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s10 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s11 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v3, s1 -; GFX8V3-NEXT: v_mov_b32_e32 v2, s0 -; GFX8V3-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 @@ -460,23 +315,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, s10 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s11 -; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7 -; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll deleted file mode 100644 index 20d0aea61f276..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll +++ /dev/null @@ -1,148 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK-LABEL: {{^}}min_64_max_64: -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 0 -; CHECK: NumSGPRsForWavesPerEU: 1 -; CHECK: NumVGPRsForWavesPerEU: 1 -define amdgpu_kernel void @min_64_max_64() #0 { -entry: - ret void -} -attributes #0 = {"amdgpu-flat-work-group-size"="64,64"} - -; CHECK-LABEL: {{^}}min_64_max_128: -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 0 -; CHECK: NumSGPRsForWavesPerEU: 1 -; CHECK: NumVGPRsForWavesPerEU: 1 -define amdgpu_kernel void @min_64_max_128() #1 { -entry: - ret void -} -attributes #1 = {"amdgpu-flat-work-group-size"="64,128"} - -; CHECK-LABEL: {{^}}min_128_max_128: -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 0 -; CHECK: NumSGPRsForWavesPerEU: 1 -; CHECK: NumVGPRsForWavesPerEU: 1 -define amdgpu_kernel void @min_128_max_128() #2 { -entry: - ret void -} -attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} - -; CHECK-LABEL: {{^}}min_1024_max_1024 -; CHECK: SGPRBlocks: 0 -; CHECK: VGPRBlocks: 10 -; CHECK: NumSGPRsForWavesPerEU: 2{{$}} -; CHECK: NumVGPRsForWavesPerEU: 43 -@var = addrspace(1) global float 0.0 -define amdgpu_kernel void @min_1024_max_1024() #3 { - %val0 = load volatile float, ptr addrspace(1) @var - %val1 = load volatile float, ptr addrspace(1) @var - %val2 = load volatile float, ptr addrspace(1) @var - %val3 = load volatile float, ptr addrspace(1) @var - %val4 = load volatile float, ptr addrspace(1) @var - %val5 = load volatile float, ptr addrspace(1) @var - %val6 = load volatile float, ptr addrspace(1) @var - %val7 = load volatile float, ptr addrspace(1) @var - %val8 = load volatile float, ptr addrspace(1) @var - %val9 = load volatile float, ptr addrspace(1) @var - %val10 = load volatile float, ptr addrspace(1) @var - %val11 = load volatile float, ptr addrspace(1) @var - %val12 = load volatile float, ptr addrspace(1) @var - %val13 = load volatile float, ptr addrspace(1) @var - %val14 = load volatile float, ptr addrspace(1) @var - %val15 = load volatile float, ptr addrspace(1) @var - %val16 = load volatile float, ptr addrspace(1) @var - %val17 = load volatile float, ptr addrspace(1) @var - %val18 = load volatile float, ptr addrspace(1) @var - %val19 = load volatile float, ptr addrspace(1) @var - %val20 = load volatile float, ptr addrspace(1) @var - %val21 = load volatile float, ptr addrspace(1) @var - %val22 = load volatile float, ptr addrspace(1) @var - %val23 = load volatile float, ptr addrspace(1) @var - %val24 = load volatile float, ptr addrspace(1) @var - %val25 = load volatile float, ptr addrspace(1) @var - %val26 = load volatile float, ptr addrspace(1) @var - %val27 = load volatile float, ptr addrspace(1) @var - %val28 = load volatile float, ptr addrspace(1) @var - %val29 = load volatile float, ptr addrspace(1) @var - %val30 = load volatile float, ptr addrspace(1) @var - %val31 = load volatile float, ptr addrspace(1) @var - %val32 = load volatile float, ptr addrspace(1) @var - %val33 = load volatile float, ptr addrspace(1) @var - %val34 = load volatile float, ptr addrspace(1) @var - %val35 = load volatile float, ptr addrspace(1) @var - %val36 = load volatile float, ptr addrspace(1) @var - %val37 = load volatile float, ptr addrspace(1) @var - %val38 = load volatile float, ptr addrspace(1) @var - %val39 = load volatile float, ptr addrspace(1) @var - %val40 = load volatile float, ptr addrspace(1) @var - - store volatile float %val0, ptr addrspace(1) @var - store volatile float %val1, ptr addrspace(1) @var - store volatile float %val2, ptr addrspace(1) @var - store volatile float %val3, ptr addrspace(1) @var - store volatile float %val4, ptr addrspace(1) @var - store volatile float %val5, ptr addrspace(1) @var - store volatile float %val6, ptr addrspace(1) @var - store volatile float %val7, ptr addrspace(1) @var - store volatile float %val8, ptr addrspace(1) @var - store volatile float %val9, ptr addrspace(1) @var - store volatile float %val10, ptr addrspace(1) @var - store volatile float %val11, ptr addrspace(1) @var - store volatile float %val12, ptr addrspace(1) @var - store volatile float %val13, ptr addrspace(1) @var - store volatile float %val14, ptr addrspace(1) @var - store volatile float %val15, ptr addrspace(1) @var - store volatile float %val16, ptr addrspace(1) @var - store volatile float %val17, ptr addrspace(1) @var - store volatile float %val18, ptr addrspace(1) @var - store volatile float %val19, ptr addrspace(1) @var - store volatile float %val20, ptr addrspace(1) @var - store volatile float %val21, ptr addrspace(1) @var - store volatile float %val22, ptr addrspace(1) @var - store volatile float %val23, ptr addrspace(1) @var - store volatile float %val24, ptr addrspace(1) @var - store volatile float %val25, ptr addrspace(1) @var - store volatile float %val26, ptr addrspace(1) @var - store volatile float %val27, ptr addrspace(1) @var - store volatile float %val28, ptr addrspace(1) @var - store volatile float %val29, ptr addrspace(1) @var - store volatile float %val30, ptr addrspace(1) @var - store volatile float %val31, ptr addrspace(1) @var - store volatile float %val32, ptr addrspace(1) @var - store volatile float %val33, ptr addrspace(1) @var - store volatile float %val34, ptr addrspace(1) @var - store volatile float %val35, ptr addrspace(1) @var - store volatile float %val36, ptr addrspace(1) @var - store volatile float %val37, ptr addrspace(1) @var - store volatile float %val38, ptr addrspace(1) @var - store volatile float %val39, ptr addrspace(1) @var - store volatile float %val40, ptr addrspace(1) @var - - ret void -} -attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} - -; CHECK: amdhsa.kernels: -; CHECK: .max_flat_workgroup_size: 64 -; CHECK: .name: min_64_max_64 -; CHECK: .max_flat_workgroup_size: 128 -; CHECK: .name: min_64_max_128 -; CHECK: .max_flat_workgroup_size: 128 -; CHECK: .name: min_128_max_128 -; CHECK: .max_flat_workgroup_size: 1024 -; CHECK: .name: min_1024_max_1024 -; CHECK: amdhsa.version: -; CHECK: - 1 -; CHECK: - 0 - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll deleted file mode 100644 index 6c553e3726abf..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll +++ /dev/null @@ -1,168 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1034 < %s | FileCheck --check-prefixes=V3-GFX1034 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1035 < %s | FileCheck --check-prefixes=V3-GFX1035 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 < %s | FileCheck --check-prefixes=V3-GFX1036 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=V3-GFX1100 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 < %s | FileCheck --check-prefixes=V3-GFX1101 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 < %s | FileCheck --check-prefixes=V3-GFX1102 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=V3-GFX1103 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1150 < %s | FileCheck --check-prefixes=V3-GFX1150 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=V3-GFX1151 %s - -; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" -; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" -; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" -; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" -; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" -; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" -; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703" -; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" -; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" -; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" -; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack" -; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" -; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" -; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" -; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" -; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" -; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" -; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" -; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" -; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" -; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" -; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" -; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" -; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" -; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack" -; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" -; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" -; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc" -; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack" -; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc" -; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" -; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack" -; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" -; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack" -; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc" -; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" -; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" -; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" -; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" -; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack" -; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" -; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack" -; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" -; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack" -; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" -; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" -; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" -; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" -; V3-GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034" -; V3-GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035" -; V3-GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036" -; V3-GFX1100: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" -; V3-GFX1101: .amdgcn_target "amdgcn-amd-amdhsa--gfx1101" -; V3-GFX1102: .amdgcn_target "amdgcn-amd-amdhsa--gfx1102" -; V3-GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103" -; V3-GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150" -; V3-GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151" - - - -define amdgpu_kernel void @directive_amdgcn_target() { - ret void -} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll similarity index 98% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll index 37b124e7f59a0..042abe382283a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll @@ -142,7 +142,7 @@ define amdgpu_kernel void @test_no_default_queue(i8 %a) #3 ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; CHECK-NOT: amdhsa.printf: attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" } @@ -151,7 +151,7 @@ attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implici attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{i32 0} !2 = !{!"none"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll index 8e8023aa16f13..fb08fd2c45085 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll @@ -39,4 +39,4 @@ define internal void @bar.5() { ; PARSER: AMDGPU HSA Metadata Parser Test: PASS !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll index 69efc47008e6a..dc3a6e8b633b2 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1739,14 +1739,14 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) ; CHECK-NEXT: - '2:1:8:%g\n' ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" } attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !llvm.printf.fmts = !{!100, !101} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll index 47b882494c919..f4892ebdc9c93 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll @@ -286,7 +286,7 @@ entry: ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to ; avoid optimizing out the implicit argument allocation. @@ -298,4 +298,4 @@ attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll similarity index 96% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll index cb3ae289721bc..22c6e14776220 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll @@ -35,12 +35,12 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0 ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll index a3f8c5cff95df..8f90025fe8e29 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll @@ -296,4 +296,4 @@ attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" } attributes #4 = { noinline } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll similarity index 98% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll index b7f58bbb51bb2..6d49f22eb429b 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -96,10 +96,10 @@ define amdgpu_kernel void @test(ptr addrspace(1) %a, ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll similarity index 80% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll index 8117037baaffc..fc5e6e2731253 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll @@ -5,9 +5,9 @@ ; CHECK: --- ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; CHECK: ... !opencl.ocl.version = !{} !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll similarity index 81% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll index ea744863a9b88..1ec79c95bc2a3 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll @@ -5,10 +5,10 @@ ; CHECK: --- ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 ; CHECK: ... !opencl.ocl.version = !{!0} !llvm.module.flags = !{!1} !0 = !{i32 1} -!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll index 1999c7b065e68..82ee23dca3f65 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -158,11 +158,11 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 { ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 +; CHECK-NEXT: - 1 attributes #0 = { "amdgpu-num-sgpr"="14" } attributes #1 = { "amdgpu-num-vgpr"="20" } attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll index 9760e93eb48e6..9e6c0ef86906d 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll @@ -1,36 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { -; GFX8V3-LABEL: addrspacecast: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40 -; GFX8V3-NEXT: v_mov_b32_e32 v4, 1 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V3-NEXT: s_cselect_b32 s3, s3, 0 -; GFX8V3-NEXT: s_cselect_b32 s0, s0, 0 -; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s3 -; GFX8V3-NEXT: s_cselect_b32 s0, s2, 0 -; GFX8V3-NEXT: s_cselect_b32 s1, s1, 0 -; GFX8V3-NEXT: v_mov_b32_e32 v2, s1 -; GFX8V3-NEXT: v_mov_b32_e32 v3, s0 -; GFX8V3-NEXT: flat_store_dword v[0:1], v4 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, 2 -; GFX8V3-NEXT: flat_store_dword v[2:3], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -77,30 +52,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: addrspacecast: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base -; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base -; GFX9V3-NEXT: v_mov_b32_e32 v4, 1 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V3-NEXT: s_cselect_b32 s2, s3, 0 -; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0 -; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s2 -; GFX9V3-NEXT: s_cselect_b32 s0, s5, 0 -; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0 -; GFX9V3-NEXT: v_mov_b32_e32 v2, s1 -; GFX9V3-NEXT: v_mov_b32_e32 v3, s0 -; GFX9V3-NEXT: flat_store_dword v[0:1], v4 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, 2 -; GFX9V3-NEXT: flat_store_dword v[2:3], v0 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: addrspacecast: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -156,18 +107,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr } define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_shared: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40 -; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40 @@ -192,18 +131,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_shared: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4 -; GFX9V3-NEXT: s_mov_b64 s[0:1], src_shared_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1 -; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_shared: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4 @@ -234,18 +161,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { } define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_is_private: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44 -; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4 -; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 -; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX8V3-NEXT: flat_store_dword v[0:1], v0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44 @@ -270,18 +185,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_is_private: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4 -; GFX9V3-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1 -; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; GFX9V3-NEXT: global_store_dword v[0:1], v0, off -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_is_private: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4 @@ -312,11 +215,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { } define amdgpu_kernel void @llvm_trap() { -; GFX8V3-LABEL: llvm_trap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX8V3-NEXT: s_trap 2 -; ; GFX8V4-LABEL: llvm_trap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5] @@ -328,11 +226,6 @@ define amdgpu_kernel void @llvm_trap() { ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_trap 2 ; -; GFX9V3-LABEL: llvm_trap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; GFX9V3-NEXT: s_trap 2 -; ; GFX9V4-LABEL: llvm_trap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 2 @@ -345,10 +238,6 @@ define amdgpu_kernel void @llvm_trap() { } define amdgpu_kernel void @llvm_debugtrap() { -; GFX8V3-LABEL: llvm_debugtrap: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: s_trap 3 -; ; GFX8V4-LABEL: llvm_debugtrap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_trap 3 @@ -357,10 +246,6 @@ define amdgpu_kernel void @llvm_debugtrap() { ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_trap 3 ; -; GFX9V3-LABEL: llvm_debugtrap: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: s_trap 3 -; ; GFX9V4-LABEL: llvm_debugtrap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 3 @@ -373,31 +258,6 @@ define amdgpu_kernel void @llvm_debugtrap() { } define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { -; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX8V3: ; %bb.0: -; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s7 -; GFX8V3-NEXT: s_add_u32 s0, s8, 8 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_addc_u32 s1, s9, 0 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 -; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX8V3-NEXT: v_mov_b32_e32 v2, s10 -; GFX8V3-NEXT: v_mov_b32_e32 v3, s11 -; GFX8V3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 -; GFX8V3-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8V3-NEXT: s_waitcnt vmcnt(0) -; GFX8V3-NEXT: s_endpgm -; ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 @@ -446,23 +306,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; -; GFX9V3-LABEL: llvm_amdgcn_queue_ptr: -; GFX9V3: ; %bb.0: -; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc -; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: v_mov_b32_e32 v0, s10 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s11 -; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7 -; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5 -; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9V3-NEXT: s_waitcnt vmcnt(0) -; GFX9V3-NEXT: s_endpgm -; ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll index 0353e7ee49ab9..a04fe28dbffff 100644 --- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll +++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -1,17 +1,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA %s declare void @llvm.trap() #0 -; HSA: .amdhsa_kernel trap -; HSA-NEXT: .amdhsa_group_segment_fixed_size 0 -; HSA-NEXT: .amdhsa_private_segment_fixed_size 0 -; HSA-NEXT: .amdhsa_kernarg_size 8 -; HSA-NEXT: .amdhsa_user_sgpr_count 8 -; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -; HSA: .end_amdhsa_kernel - ; DOORBELL: .amdhsa_kernel trap ; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0 ; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll index 792ec2675247f..9ed896c148e64 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -317,4 +317,4 @@ attributes #1 = { nounwind "stackrealign" } attributes #2 = { nounwind alignstack=128 } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll index 03ea582698486..54a15513cf0a5 100644 --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -1,101 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s -; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s +; RUN: llc %s -o - -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900 %s +; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803 %s +; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900 %s +; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900 %s declare void @llvm.trap() #0 declare void @llvm.debugtrap() #1 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { -; NOHSA-TRAP-GFX900-V3-LABEL: trap: -; NOHSA-TRAP-GFX900-V3: ; %bb.0: -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; NOHSA-TRAP-GFX900-V4-LABEL: trap: -; NOHSA-TRAP-GFX900-V4: ; %bb.0: -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V3-LABEL: trap: -; HSA-TRAP-GFX803-V3: ; %bb.0: -; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s2 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s3 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX803-V4-LABEL: trap: -; HSA-TRAP-GFX803-V4: ; %bb.0: -; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s2 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s3 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V3-LABEL: trap: -; HSA-TRAP-GFX900-V3: ; %bb.0: -; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V4-LABEL: trap: -; HSA-TRAP-GFX900-V4: ; %bb.0: -; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 -; -; HSA-NOTRAP-GFX900-V3-LABEL: trap: -; HSA-NOTRAP-GFX900-V3: ; %bb.0: -; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V4-LABEL: trap: -; HSA-NOTRAP-GFX900-V4: ; %bb.0: -; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-LABEL: trap: +; NOHSA-TRAP-GFX900: ; %bb.0: +; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-LABEL: trap: +; HSA-TRAP-GFX803: ; %bb.0: +; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s2 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s3 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-LABEL: trap: +; HSA-TRAP-GFX900: ; %bb.0: +; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_trap 2 +; +; HSA-NOTRAP-GFX900-LABEL: trap: +; HSA-NOTRAP-GFX900: ; %bb.0: +; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: s_endpgm store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable @@ -104,150 +57,77 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { } define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { -; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap: -; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 -; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; NOHSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap: -; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 -; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; NOHSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap: -; HSA-TRAP-GFX803-V3: ; %bb.0: ; %entry -; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 3 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_endpgm -; HSA-TRAP-GFX803-V3-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX803-V4-LABEL: non_entry_trap: -; HSA-TRAP-GFX803-V4: ; %bb.0: ; %entry -; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 -; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 3 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_endpgm -; HSA-TRAP-GFX803-V4-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX803-V4-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap: -; HSA-TRAP-GFX900-V3: ; %bb.0: ; %entry -; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_endpgm -; HSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5] -; HSA-TRAP-GFX900-V3-NEXT: s_trap 2 -; -; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap: -; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry -; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_endpgm -; HSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap -; HSA-TRAP-GFX900-V4-NEXT: s_trap 2 -; -; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap: -; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; %entry -; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3 -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; HSA-NOTRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap: -; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry -; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 -; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2 -; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3 -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm -; HSA-NOTRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-LABEL: non_entry_trap: +; NOHSA-TRAP-GFX900: ; %bb.0: ; %entry +; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; NOHSA-TRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2 +; NOHSA-TRAP-GFX900-NEXT: ; %bb.1: ; %ret +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3 +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-NEXT: .LBB1_2: ; %trap +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-LABEL: non_entry_trap: +; HSA-TRAP-GFX803: ; %bb.0: ; %entry +; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-NEXT: flat_load_dword v0, v[0:1] glc +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0 +; HSA-TRAP-GFX803-NEXT: s_cbranch_vccz .LBB1_2 +; HSA-TRAP-GFX803-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 3 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_endpgm +; HSA-TRAP-GFX803-NEXT: .LBB1_2: ; %trap +; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-GFX803-NEXT: s_trap 2 +; +; HSA-TRAP-GFX900-LABEL: non_entry_trap: +; HSA-TRAP-GFX900: ; %bb.0: ; %entry +; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-TRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2 +; HSA-TRAP-GFX900-NEXT: ; %bb.1: ; %ret +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3 +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_endpgm +; HSA-TRAP-GFX900-NEXT: .LBB1_2: ; %trap +; HSA-TRAP-GFX900-NEXT: s_trap 2 +; +; HSA-NOTRAP-GFX900-LABEL: non_entry_trap: +; HSA-NOTRAP-GFX900: ; %bb.0: ; %entry +; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1 +; HSA-NOTRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2 +; HSA-NOTRAP-GFX900-NEXT: ; %bb.1: ; %ret +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3 +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: s_endpgm +; HSA-NOTRAP-GFX900-NEXT: .LBB1_2: ; %trap +; HSA-NOTRAP-GFX900-NEXT: s_endpgm entry: %tmp29 = load volatile i32, ptr addrspace(1) %arg0 %cmp = icmp eq i32 %tmp29, -1 @@ -263,115 +143,60 @@ ret: } define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) { -; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap: -; NOHSA-TRAP-GFX900-V3: ; %bb.0: -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] -; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap: -; NOHSA-TRAP-GFX900-V4: ; %bb.0: -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] -; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V3-LABEL: debugtrap: -; HSA-TRAP-GFX803-V3: ; %bb.0: -; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_trap 3 -; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3 -; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V3-NEXT: s_endpgm -; -; HSA-TRAP-GFX803-V4-LABEL: debugtrap: -; HSA-TRAP-GFX803-V4: ; %bb.0: -; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0 -; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_trap 3 -; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v3 -; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX803-V4-NEXT: s_endpgm -; -; HSA-TRAP-GFX900-V3-LABEL: debugtrap: -; HSA-TRAP-GFX900-V3: ; %bb.0: -; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_trap 3 -; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-TRAP-GFX900-V4-LABEL: debugtrap: -; HSA-TRAP-GFX900-V4: ; %bb.0: -; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_trap 3 -; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX900-V4-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap: -; HSA-NOTRAP-GFX900-V3: ; %bb.0: -; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm -; -; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap: -; HSA-NOTRAP-GFX900-V4: ; %bb.0: -; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 -; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2 -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] -; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) -; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm +; NOHSA-TRAP-GFX900-LABEL: debugtrap: +; NOHSA-TRAP-GFX900: ; %bb.0: +; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2 +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1] +; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; NOHSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-TRAP-GFX803-LABEL: debugtrap: +; HSA-TRAP-GFX803: ; %bb.0: +; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 1 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v3, 2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0 +; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_trap 3 +; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v3 +; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX803-NEXT: s_endpgm +; +; HSA-TRAP-GFX900-LABEL: debugtrap: +; HSA-TRAP-GFX900: ; %bb.0: +; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2 +; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_trap 3 +; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-TRAP-GFX900-NEXT: s_endpgm +; +; HSA-NOTRAP-GFX900-LABEL: debugtrap: +; HSA-NOTRAP-GFX900: ; %bb.0: +; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1 +; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2 +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1] +; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0) +; HSA-NOTRAP-GFX900-NEXT: s_endpgm store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.debugtrap() store volatile i32 2, ptr addrspace(1) %arg0 @@ -382,4 +207,4 @@ attributes #0 = { nounwind noreturn } attributes #1 = { nounwind } !llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s similarity index 94% rename from llvm/test/MC/AMDGPU/hsa-diag-v3.s rename to llvm/test/MC/AMDGPU/hsa-diag-v4.s index 369ac905ad2b2..f7a554aedb746 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s @@ -1,18 +1,18 @@ -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL .text // GCN-LABEL: warning: test_target // GFX8-NOT: error: -// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack -// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1100 -// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810 +// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+ +// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100 +// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810 .warning "test_target" -.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" +.amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+" // GCN-LABEL: warning: test_amdhsa_kernel_no_name // GCN: error: unknown directive diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s deleted file mode 100644 index ba60000837cdc..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ /dev/null @@ -1,226 +0,0 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 -// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr -// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd -// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 -// special_sgpr -// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.p2align 8 -.type special_sgpr,@function -special_sgpr: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_shared_vgpr_count 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .amdhsa_shared_vgpr_count 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_private_segment_buffer 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_flat_scratch_init 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_wavefront_size32 1 - .amdhsa_system_sgpr_private_segment_wavefront_offset 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_workgroup_processor_mode 1 - .amdhsa_memory_ordered 1 - .amdhsa_forward_progress 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 15 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 -// ASM-NEXT: .amdhsa_memory_ordered 1 -// ASM-NEXT: .amdhsa_forward_progress 1 -// ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -// Test that we are including special SGPR usage in the granulated count. -.p2align 6 -.amdhsa_kernel special_sgpr - // Same next_free_sgpr as "complete", but... - .amdhsa_next_free_sgpr 27 - // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from - // 3 granules to 4 - .amdhsa_reserve_flat_scratch 1 - - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_xnack_mask 1 - - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_next_free_vgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel special_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s deleted file mode 100644 index 7f885b457aa63..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s +++ /dev/null @@ -1,213 +0,0 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 -// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr -// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd -// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000 -// special_sgpr -// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx1100" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.p2align 8 -.type special_sgpr,@function -special_sgpr: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_wavefront_size32 1 - .amdhsa_enable_private_segment 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_reserve_vcc 0 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_workgroup_processor_mode 1 - .amdhsa_memory_ordered 1 - .amdhsa_forward_progress 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 9 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_enable_private_segment 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 -// ASM-NEXT: .amdhsa_memory_ordered 1 -// ASM-NEXT: .amdhsa_forward_progress 1 -// ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -// Test that we are including special SGPR usage in the granulated count. -.p2align 6 -.amdhsa_kernel special_sgpr - // Same next_free_sgpr as "complete", but... - .amdhsa_next_free_sgpr 27 - // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from - // 3 granules to 4 - - .amdhsa_reserve_vcc 0 - - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_next_free_vgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel special_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 11 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s deleted file mode 100644 index fd84fab8af816..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s +++ /dev/null @@ -1,184 +0,0 @@ -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t -// RUN: llvm-readobj --elf-output-style=GNU --sections --symbols --relocations %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 c1500104 210f007f 7f008100 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_accum_offset 4 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM: .amdhsa_tg_split 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_user_sgpr_private_segment_buffer 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_flat_scratch_init 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_kernarg_preload_length 1 - .amdhsa_user_sgpr_kernarg_preload_offset 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_system_sgpr_private_segment_wavefront_offset 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_accum_offset 4 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_tg_split 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 16 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_tg_split 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 9 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s deleted file mode 100644 index 9624515ecd6fb..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s +++ /dev/null @@ -1,178 +0,0 @@ -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 -// OBJDUMP-NEXT: 0070 01510104 150f007f 5e008100 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_accum_offset 4 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM: .amdhsa_tg_split 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_kernarg_preload_length 1 - .amdhsa_user_sgpr_kernarg_preload_offset 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_enable_private_segment 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_accum_offset 4 - .amdhsa_reserve_vcc 0 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_tg_split 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 10 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_enable_private_segment 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_accum_offset 4 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_tg_split 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 9 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s deleted file mode 100644 index 9f854986d7bc4..0000000000000 --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ /dev/null @@ -1,304 +0,0 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t -// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s -// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s - -// READOBJ: Section Headers -// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 - -// READOBJ: Relocation section '.rela.rodata' at offset -// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 -// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 -// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 -// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310 - -// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: -// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal -// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete -// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr -// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr -// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd -// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd -// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd -// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd - -// OBJDUMP: Contents of section .rodata -// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. -// minimal -// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 -// complete -// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 -// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 -// special_sgpr -// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000 -// disabled_user_sgpr -// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000 - -.text -// ASM: .text - -.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" - -.p2align 8 -.type minimal,@function -minimal: - s_endpgm - -.p2align 8 -.type complete,@function -complete: - s_endpgm - -.p2align 8 -.type special_sgpr,@function -special_sgpr: - s_endpgm - -.p2align 8 -.type disabled_user_sgpr,@function -disabled_user_sgpr: - s_endpgm - -.rodata -// ASM: .rodata - -// Test that only specifying required directives is allowed, and that defaulted -// values are omitted. -.p2align 6 -.amdhsa_kernel minimal - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel minimal -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .end_amdhsa_kernel - -// Test that we can specify all available directives with non-default values. -.p2align 6 -.amdhsa_kernel complete - .amdhsa_group_segment_fixed_size 1 - .amdhsa_private_segment_fixed_size 1 - .amdhsa_kernarg_size 8 - .amdhsa_user_sgpr_private_segment_buffer 1 - .amdhsa_user_sgpr_dispatch_ptr 1 - .amdhsa_user_sgpr_queue_ptr 1 - .amdhsa_user_sgpr_kernarg_segment_ptr 1 - .amdhsa_user_sgpr_dispatch_id 1 - .amdhsa_user_sgpr_flat_scratch_init 1 - .amdhsa_user_sgpr_private_segment_size 1 - .amdhsa_system_sgpr_private_segment_wavefront_offset 1 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 1 - .amdhsa_system_sgpr_workgroup_id_z 1 - .amdhsa_system_sgpr_workgroup_info 1 - .amdhsa_system_vgpr_workitem_id 1 - .amdhsa_next_free_vgpr 9 - .amdhsa_next_free_sgpr 27 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_float_round_mode_32 1 - .amdhsa_float_round_mode_16_64 1 - .amdhsa_float_denorm_mode_32 1 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 1 - .amdhsa_exception_fp_ieee_invalid_op 1 - .amdhsa_exception_fp_denorm_src 1 - .amdhsa_exception_fp_ieee_div_zero 1 - .amdhsa_exception_fp_ieee_overflow 1 - .amdhsa_exception_fp_ieee_underflow 1 - .amdhsa_exception_fp_ieee_inexact 1 - .amdhsa_exception_int_div_zero 1 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel complete -// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 -// ASM-NEXT: .amdhsa_kernarg_size 8 -// ASM-NEXT: .amdhsa_user_sgpr_count 15 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 -// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 -// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 -// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 -// ASM-NEXT: .amdhsa_next_free_vgpr 9 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 1 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM-NEXT: .amdhsa_fp16_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 -// ASM-NEXT: .amdhsa_exception_int_div_zero 1 -// ASM-NEXT: .end_amdhsa_kernel - -// Test that we are including special SGPR usage in the granulated count. -.p2align 6 -.amdhsa_kernel special_sgpr - // Same next_free_sgpr as "complete", but... - .amdhsa_next_free_sgpr 27 - // ...on GFX9 this should require an additional 6 SGPRs, pushing us from - // 3 granules to 4 - .amdhsa_reserve_flat_scratch 1 - - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_xnack_mask 1 - - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_next_free_vgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel special_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 27 -// ASM-NEXT: .amdhsa_reserve_vcc 0 -// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM: .amdhsa_float_denorm_mode_16_64 0 -// ASM-NEXT: .amdhsa_dx10_clamp 0 -// ASM-NEXT: .amdhsa_ieee_mode 0 -// ASM: .end_amdhsa_kernel - -// Test that explicitly disabling user_sgpr's does not affect the user_sgpr -// count, i.e. this should produce the same descriptor as minimal. -.p2align 6 -.amdhsa_kernel disabled_user_sgpr - .amdhsa_user_sgpr_private_segment_buffer 0 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -// ASM: .amdhsa_kernel disabled_user_sgpr -// ASM: .amdhsa_next_free_vgpr 0 -// ASM-NEXT: .amdhsa_next_free_sgpr 0 -// ASM: .end_amdhsa_kernel - -.section .foo - -.byte .amdgcn.gfx_generation_number -// ASM: .byte 9 - -.byte .amdgcn.gfx_generation_minor -// ASM: .byte 0 - -.byte .amdgcn.gfx_generation_stepping -// ASM: .byte 4 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v7, s10 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 8 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 11 - -.set .amdgcn.next_free_vgpr, 0 -.set .amdgcn.next_free_sgpr, 0 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 0 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 0 - -v_mov_b32_e32 v16, s3 - -.byte .amdgcn.next_free_vgpr -// ASM: .byte 17 -.byte .amdgcn.next_free_sgpr -// ASM: .byte 4 - -// Metadata - -.amdgpu_metadata - amdhsa.version: - - 3 - - 0 - amdhsa.kernels: - - .name: amd_kernel_code_t_test_all - .symbol: amd_kernel_code_t_test_all@kd - .kernarg_segment_size: 8 - .group_segment_fixed_size: 16 - .private_segment_fixed_size: 32 - .kernarg_segment_align: 64 - .wavefront_size: 128 - .sgpr_count: 14 - .vgpr_count: 40 - .max_flat_workgroup_size: 256 - - .name: amd_kernel_code_t_minimal - .symbol: amd_kernel_code_t_minimal@kd - .kernarg_segment_size: 8 - .group_segment_fixed_size: 16 - .private_segment_fixed_size: 32 - .kernarg_segment_align: 64 - .wavefront_size: 128 - .sgpr_count: 14 - .vgpr_count: 40 - .max_flat_workgroup_size: 256 -.end_amdgpu_metadata - -// ASM: .amdgpu_metadata -// ASM: amdhsa.kernels: -// ASM: - .group_segment_fixed_size: 16 -// ASM: .kernarg_segment_align: 64 -// ASM: .kernarg_segment_size: 8 -// ASM: .max_flat_workgroup_size: 256 -// ASM: .name: amd_kernel_code_t_test_all -// ASM: .private_segment_fixed_size: 32 -// ASM: .sgpr_count: 14 -// ASM: .symbol: 'amd_kernel_code_t_test_all@kd' -// ASM: .vgpr_count: 40 -// ASM: .wavefront_size: 128 -// ASM: - .group_segment_fixed_size: 16 -// ASM: .kernarg_segment_align: 64 -// ASM: .kernarg_segment_size: 8 -// ASM: .max_flat_workgroup_size: 256 -// ASM: .name: amd_kernel_code_t_minimal -// ASM: .private_segment_fixed_size: 32 -// ASM: .sgpr_count: 14 -// ASM: .symbol: 'amd_kernel_code_t_minimal@kd' -// ASM: .vgpr_count: 40 -// ASM: .wavefront_size: 128 -// ASM: amdhsa.version: -// ASM-NEXT: - 3 -// ASM-NEXT: - 0 -// ASM: .end_amdgpu_metadata diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s index 63e532e0ffa37..7e3ae8424cc7b 100644 --- a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s +++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s +// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s .amdhsa_kernel implied_count_too_low_0 .amdhsa_user_sgpr_count 0 diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s index aa8970185eb04..950c514f786b2 100644 --- a/llvm/test/MC/AMDGPU/user-sgpr-count.s +++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s @@ -1,10 +1,10 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s .text // ASM: .text -.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc" +.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" // ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count