diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index a09c409f8f91a..46f99d0bbdd06 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4403,6 +4403,7 @@ immediately after the name being declared. For example, this applies the GNU ``unused`` attribute to ``a`` and ``f``, and also applies the GNU ``noreturn`` attribute to ``f``. +Examples: .. code-block:: c++ [[gnu::unused]] int a, f [[gnu::noreturn]] (); @@ -4412,6 +4413,42 @@ Target-Specific Extensions Clang supports some language features conditionally on some targets. +AMDGPU Language Extensions +-------------------------- + +__builtin_amdgcn_fence +^^^^^^^^^^^^^^^^^^^^^^ + +``__builtin_amdgcn_fence`` emits a fence. + +* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE`` +* ``const char *`` synchronization scope, e.g. ``workgroup`` +* Zero or more ``const char *`` address spaces names. + +The address spaces arguments must be one of the following string literals: + +* ``"local"`` +* ``"global"`` + +If one or more address space name are provided, the code generator will attempt +to emit potentially faster instructions that order access to at least those +address spaces. +Emitting such instructions may not always be possible and the compiler is free +to fence more aggressively. + +If no address spaces names are provided, all address spaces are fenced. + +.. code-block:: c++ + + // Fence all address spaces. + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent"); + + // Fence only requested address spaces. + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local") + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global") + + ARM/AArch64 Language Extensions ------------------------------- @@ -5602,4 +5639,4 @@ Compiling different TUs depending on these flags (including use of ``std::hardware_constructive_interference`` or ``std::hardware_destructive_interference``) with different compilers, macro definitions, or architecture flags will lead to ODR violations and should be -avoided. \ No newline at end of file +avoided. diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index efa652eee9901..433c7795325f0 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -68,7 +68,7 @@ BUILTIN(__builtin_amdgcn_sched_group_barrier, "vIiIiIi", "n") BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n") BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n") BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") -BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n") +BUILTIN(__builtin_amdgcn_fence, "vUicC*.", "n") BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n") BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0549afa12e430..5edf8c7970913 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" +#include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -18327,6 +18328,29 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return nullptr; } +void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, + const CallExpr *E) { + constexpr const char *Tag = "amdgpu-as"; + + LLVMContext &Ctx = Inst->getContext(); + SmallVector MMRAs; + for (unsigned K = 2; K < E->getNumArgs(); ++K) { + llvm::Value *V = EmitScalarExpr(E->getArg(K)); + StringRef AS; + if (llvm::getConstantStringInfo(V, AS)) { + MMRAs.push_back({Tag, AS}); + // TODO: Delete the resulting unused constant? + continue; + } + CGM.Error(E->getExprLoc(), + "expected an address space name as a string literal"); + } + + llvm::sort(MMRAs); + MMRAs.erase(llvm::unique(MMRAs), MMRAs.end()); + Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; @@ -18997,7 +19021,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fence: { ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), AO, SSID); - return Builder.CreateFence(AO, SSID); + FenceInst *Fence = Builder.CreateFence(AO, SSID); + if (E->getNumArgs() > 2) + AddAMDGPUFenceAddressSpaceMMRA(Fence, E); + return Fence; } case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 5f3ee7eb943f9..45585361a4fc9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4635,6 +4635,9 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); + + void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, + const CallExpr *E); void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID); diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp index 630e416b893f4..3af5a21ba0cd5 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -1,22 +1,111 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: fence syncscope("workgroup") seq_cst +// CHECK-NEXT: fence syncscope("agent") acquire +// CHECK-NEXT: fence seq_cst +// CHECK-NEXT: fence syncscope("agent") acq_rel +// CHECK-NEXT: fence syncscope("workgroup") release +// CHECK-NEXT: ret void +// void test_memory_fence_success() { - // CHECK-LABEL: test_memory_fence_success - // CHECK: fence syncscope("workgroup") seq_cst __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); - // CHECK: fence syncscope("agent") acquire __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent"); - // CHECK: fence seq_cst __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); - // CHECK: fence syncscope("agent") acq_rel __builtin_amdgcn_fence(4, "agent"); - // CHECK: fence syncscope("workgroup") release __builtin_amdgcn_fence(3, "workgroup"); } + +// CHECK-LABEL: define dso_local void @_Z10test_localv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// CHECK-NEXT: fence seq_cst, !mmra [[META3]] +// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// CHECK-NEXT: ret void +// +void test_local() { + __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); + + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local"); + + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local"); + + __builtin_amdgcn_fence(4, "agent", "local"); + + __builtin_amdgcn_fence(3, "workgroup", "local"); +} + + +// CHECK-LABEL: define dso_local void @_Z11test_globalv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META4]] +// CHECK-NEXT: fence seq_cst, !mmra [[META4]] +// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] +// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// CHECK-NEXT: ret void +// +void test_global() { + __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global"); + + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "global"); + + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "global"); + + __builtin_amdgcn_fence(4, "agent", "global"); + + __builtin_amdgcn_fence(3, "workgroup", "global"); +} + +// CHECK-LABEL: define dso_local void @_Z10test_imagev( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// CHECK-NEXT: fence seq_cst, !mmra [[META3]] +// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// CHECK-NEXT: ret void +// +void test_image() { + __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); + + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local"); + + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local"); + + __builtin_amdgcn_fence(4, "agent", "local"); + + __builtin_amdgcn_fence(3, "workgroup", "local"); +} + +// CHECK-LABEL: define dso_local void @_Z10test_mixedv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// CHECK-NEXT: ret void +// +void test_mixed() { + __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global"); + __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local"); +} +//. +// CHECK: [[META3]] = !{!"amdgpu-as", !"local"} +// CHECK: [[META4]] = !{!"amdgpu-as", !"global"} +// CHECK: [[META5]] = !{[[META4]], [[META3]]} +//. diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl index b044763edcf00..7a550f026bc1b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -155,8 +155,8 @@ void test_ds_fmaxf(local float *out, float src, int a) { void test_fence() { __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} - __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}} - __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}} + __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected at least 2, have 1}} + __builtin_amdgcn_fence(4, 4, 4); // expected-error {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}} __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5); // expected-error {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}} const char ptr[] = "workgroup"; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 62306fa667b36..24f8788683ed7 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -18,9 +18,11 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/TargetParser/TargetParser.h" @@ -678,6 +680,49 @@ class SIMemoryLegalizer final : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; }; +static const StringMap ASNames = {{ + {"global", SIAtomicAddrSpace::GLOBAL}, + {"local", SIAtomicAddrSpace::LDS}, +}}; + +void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) { + const MachineFunction *MF = MI.getMF(); + const Function &Fn = MF->getFunction(); + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "unknown address space '" << AS << "'; expected one of "; + ListSeparator LS; + for (const auto &[Name, Val] : ASNames) + OS << LS << '\'' << Name << '\''; + DiagnosticInfoUnsupported BadTag(Fn, Str.str(), MI.getDebugLoc(), DS_Warning); + Fn.getContext().diagnose(BadTag); +} + +/// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA. +/// If this tag isn't present, or if it has no meaningful values, returns \p +/// Default. Otherwise returns all the address spaces concerned by the MMRA. +static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI, + SIAtomicAddrSpace Default) { + static constexpr StringLiteral FenceASPrefix = "amdgpu-as"; + + auto MMRA = MMRAMetadata(MI.getMMRAMetadata()); + if (!MMRA) + return Default; + + SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE; + for (const auto &[Prefix, Suffix] : MMRA) { + if (Prefix != FenceASPrefix) + continue; + + if (auto It = ASNames.find(Suffix); It != ASNames.end()) + Result |= It->second; + else + diagnoseUnknownMMRAASName(MI, Suffix); + } + + return (Result != SIAtomicAddrSpace::NONE) ? Result : Default; +} + } // end namespace anonymous void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI, @@ -2535,12 +2580,17 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, AtomicPseudoMIs.push_back(MI); bool Changed = false; + // Refine fenced address space based on MMRAs. + // + // TODO: Should we support this MMRA on other atomic operations? + auto OrderingAddrSpace = + getFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace()); + if (MOI.isAtomic()) { if (MOI.getOrdering() == AtomicOrdering::Acquire) - Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(), - SIMemOp::LOAD | SIMemOp::STORE, - MOI.getIsCrossAddressSpaceOrdering(), - Position::BEFORE); + Changed |= CC->insertWait( + MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE, + MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE); if (MOI.getOrdering() == AtomicOrdering::Release || MOI.getOrdering() == AtomicOrdering::AcquireRelease || @@ -2552,8 +2602,7 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, /// generate a fence. Could add support in this file for /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally /// adding S_WAITCNT before a S_BARRIER. - Changed |= CC->insertRelease(MI, MOI.getScope(), - MOI.getOrderingAddrSpace(), + Changed |= CC->insertRelease(MI, MOI.getScope(), OrderingAddrSpace, MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE); @@ -2565,8 +2614,7 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) - Changed |= CC->insertAcquire(MI, MOI.getScope(), - MOI.getOrderingAddrSpace(), + Changed |= CC->insertAcquire(MI, MOI.getScope(), OrderingAddrSpace, Position::BEFORE); return Changed; diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll new file mode 100644 index 0000000000000..da9bc6b331134 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll @@ -0,0 +1,1716 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s + +define amdgpu_kernel void @workgroup_acquire_fence() { +; GFX6-LABEL: workgroup_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_release_fence() { +; GFX6-LABEL: workgroup_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_acq_rel_fence() { +; GFX6-LABEL: workgroup_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_seq_cst_fence() { +; GFX6-LABEL: workgroup_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_acquire_fence() { +; GFX6-LABEL: workgroup_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_release_fence() { +; GFX6-LABEL: workgroup_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { +; GFX6-LABEL: workgroup_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { +; GFX6-LABEL: workgroup_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_acquire_fence() { +; GFX6-LABEL: agent_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_release_fence() { +; GFX6-LABEL: agent_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_acq_rel_fence() { +; GFX6-LABEL: agent_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_seq_cst_fence() { +; GFX6-LABEL: agent_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_one_as_acquire_fence() { +; GFX6-LABEL: agent_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_one_as_release_fence() { +; GFX6-LABEL: agent_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_one_as_acq_rel_fence() { +; GFX6-LABEL: agent_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @agent_one_as_seq_cst_fence() { +; GFX6-LABEL: agent_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_acquire_fence() { +; GFX6-LABEL: system_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence acquire, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_release_fence() { +; GFX6-LABEL: system_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_endpgm +entry: + fence release, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_acq_rel_fence() { +; GFX6-LABEL: system_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence acq_rel, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_seq_cst_fence() { +; GFX6-LABEL: system_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence seq_cst, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_one_as_acquire_fence() { +; GFX6-LABEL: system_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") acquire, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_one_as_release_fence() { +; GFX6-LABEL: system_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_one_as_acq_rel_fence() { +; GFX6-LABEL: system_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-as", !"global"} + ret void +} + +define amdgpu_kernel void @system_one_as_seq_cst_fence() { +; GFX6-LABEL: system_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_wbinvl1 +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_wbinvl1_vol +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 +; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 +; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) +; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-as", !"global"} + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll new file mode 100644 index 0000000000000..601a6a60fe7b4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll @@ -0,0 +1,1296 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s + +define amdgpu_kernel void @workgroup_acquire_fence() { +; GFX6-LABEL: workgroup_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_release_fence() { +; GFX6-LABEL: workgroup_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_acq_rel_fence() { +; GFX6-LABEL: workgroup_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_seq_cst_fence() { +; GFX6-LABEL: workgroup_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_acquire_fence() { +; GFX6-LABEL: workgroup_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_release_fence() { +; GFX6-LABEL: workgroup_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { +; GFX6-LABEL: workgroup_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { +; GFX6-LABEL: workgroup_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: workgroup_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_acquire_fence() { +; GFX6-LABEL: agent_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_release_fence() { +; GFX6-LABEL: agent_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_acq_rel_fence() { +; GFX6-LABEL: agent_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_seq_cst_fence() { +; GFX6-LABEL: agent_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_one_as_acquire_fence() { +; GFX6-LABEL: agent_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_one_as_release_fence() { +; GFX6-LABEL: agent_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_one_as_acq_rel_fence() { +; GFX6-LABEL: agent_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @agent_one_as_seq_cst_fence() { +; GFX6-LABEL: agent_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: agent_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: agent_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: agent_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_acquire_fence() { +; GFX6-LABEL: system_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence acquire, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_release_fence() { +; GFX6-LABEL: system_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence release, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_acq_rel_fence() { +; GFX6-LABEL: system_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence acq_rel, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_seq_cst_fence() { +; GFX6-LABEL: system_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_endpgm +entry: + fence seq_cst, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_one_as_acquire_fence() { +; GFX6-LABEL: system_one_as_acquire_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_acquire_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_acquire_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_acquire_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_acquire_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_acquire_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") acquire, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_one_as_release_fence() { +; GFX6-LABEL: system_one_as_release_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_release_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_release_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_release_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_release_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_release_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_release_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_one_as_acq_rel_fence() { +; GFX6-LABEL: system_one_as_acq_rel_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_acq_rel_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_acq_rel_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_acq_rel_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_acq_rel_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-as", !"local"} + ret void +} + +define amdgpu_kernel void @system_one_as_seq_cst_fence() { +; GFX6-LABEL: system_one_as_seq_cst_fence: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_endpgm +; +; GFX7-LABEL: system_one_as_seq_cst_fence: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_endpgm +; +; GFX10-WGP-LABEL: system_one_as_seq_cst_fence: +; GFX10-WGP: ; %bb.0: ; %entry +; GFX10-WGP-NEXT: s_endpgm +; +; GFX10-CU-LABEL: system_one_as_seq_cst_fence: +; GFX10-CU: ; %bb.0: ; %entry +; GFX10-CU-NEXT: s_endpgm +; +; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence: +; SKIP-CACHE-INV: ; %bb.0: ; %entry +; SKIP-CACHE-INV-NEXT: s_endpgm +; +; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: s_endpgm +; +; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX940-TGSPLIT: ; %bb.0: ; %entry +; GFX940-TGSPLIT-NEXT: s_endpgm +; +; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: +; GFX11-WGP: ; %bb.0: ; %entry +; GFX11-WGP-NEXT: s_endpgm +; +; GFX11-CU-LABEL: system_one_as_seq_cst_fence: +; GFX11-CU: ; %bb.0: ; %entry +; GFX11-CU-NEXT: s_endpgm +entry: + fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-as", !"local"} + ret void +}