diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 24e35cea128e9..e5a1422fe8778 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -835,5 +835,15 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbI TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +// GFX12.5 128B cooperative atomics +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_32x4B, "ii*IicC*", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_32x4B, "vi*iIicC*", "nc", "gfx1250-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_16x8B, "V2iV2i*IicC*", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", "nc", "gfx1250-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c76385ad73a91..a591a25367301 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13609,4 +13609,6 @@ def warn_acc_var_referenced_lacks_op // AMDGCN builtins diagnostics def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">; + +def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">; } // end of sema component. diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h index d62c9bb65fadb..bac812a9d4fcf 100644 --- a/clang/include/clang/Sema/SemaAMDGPU.h +++ b/clang/include/clang/Sema/SemaAMDGPU.h @@ -26,6 +26,8 @@ class SemaAMDGPU : public SemaBase { bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore); + bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 4c1953e4b8e34..87a46287c4022 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -701,6 +701,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitBuiltinWithOneOverloadedType<5>(*this, E, Intrinsic::amdgcn_load_to_lds); } + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: { + Intrinsic::ID IID; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: + IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B: + IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: + IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: + IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: + IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: + IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B; + break; + } + + LLVMContext &Ctx = CGM.getLLVMContext(); + SmallVector Args; + // last argument is a MD string + const unsigned ScopeArg = E->getNumArgs() - 1; + for (unsigned i = 0; i != ScopeArg; ++i) + Args.push_back(EmitScalarExpr(E->getArg(i))); + StringRef Arg = cast(E->getArg(ScopeArg)->IgnoreParenCasts()) + ->getString(); + llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)}); + Args.push_back(llvm::MetadataAsValue::get(Ctx, MD)); + // Intrinsic is typed based on the pointer AS. Pointer is always the first + // argument. + llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()}); + return Builder.CreateCall(F, {Args}); + } case AMDGPU::BI__builtin_amdgcn_get_fpenv: { Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, {llvm::Type::getInt64Ty(getLLVMContext())}); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 1913bb830ccd0..baba503239e9f 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Sema.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/AtomicOrdering.h" #include @@ -100,6 +101,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6: case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6: return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7); + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: + return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false); + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: + return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true); default: return false; } @@ -145,6 +154,50 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, return false; } +bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) { + bool Fail = false; + + // First argument is a global or generic pointer. + Expr *PtrArg = TheCall->getArg(0); + QualType PtrTy = PtrArg->getType()->getPointeeType(); + unsigned AS = getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace()); + if (AS != llvm::AMDGPUAS::FLAT_ADDRESS && + AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) { + Fail = true; + Diag(TheCall->getBeginLoc(), diag::err_amdgcn_coop_atomic_invalid_as) + << PtrArg->getSourceRange(); + } + + // Check atomic ordering + Expr *AtomicOrdArg = TheCall->getArg(IsStore ? 2 : 1); + Expr::EvalResult AtomicOrdArgRes; + if (!AtomicOrdArg->EvaluateAsInt(AtomicOrdArgRes, getASTContext())) + llvm_unreachable("Intrinsic requires imm for atomic ordering argument!"); + auto Ord = + llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue()); + + // Atomic ordering cannot be acq_rel in any case, acquire for stores or + // release for loads. + if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) || + (Ord == llvm::AtomicOrderingCABI::acq_rel) || + Ord == (IsStore ? llvm::AtomicOrderingCABI::acquire + : llvm::AtomicOrderingCABI::release)) { + return Diag(AtomicOrdArg->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << 0 << AtomicOrdArg->getSourceRange(); + } + + // Last argument is a string literal + Expr *Arg = TheCall->getArg(TheCall->getNumArgs() - 1); + if (!isa(Arg->IgnoreParenImpCasts())) { + Fail = true; + Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + } + + return Fail; +} + bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs) { assert(NumDataArgs <= 2); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl new file mode 100644 index 0000000000000..8768f2f367654 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl @@ -0,0 +1,104 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[VAL]], i32 0, metadata [[META4:![0-9]+]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent"); +} + +// CHECK-LABEL: define dso_local i32 @test_amdgcn_cooperative_atomic_load_32x4B( +// CHECK-SAME: ptr noundef readonly captures(none) [[ADDR:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr [[ADDR]], i32 0, metadata [[META5:![0-9]+]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_cooperative_atomic_load_32x4B(int* addr) +{ + return __builtin_amdgcn_cooperative_atomic_load_32x4B(addr, __ATOMIC_RELAXED, ""); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_16x8B( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <2 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p1(ptr addrspace(1) [[GADDR]], <2 x i32> [[VAL]], i32 0, metadata [[META5]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val) +{ + __builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, ""); +} + +// CHECK-LABEL: define dso_local <2 x i32> @test_amdgcn_cooperative_atomic_load_16x8B( +// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META6:![0-9]+]]) +// CHECK-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, "workgroup"); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_8x16B( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <4 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p1(ptr addrspace(1) [[GADDR]], <4 x i32> [[VAL]], i32 0, metadata [[META7:![0-9]+]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val) +{ + __builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, "singlethread"); +} + +// CHECK-LABEL: define dso_local <4 x i32> @test_amdgcn_cooperative_atomic_load_8x16B( +// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META4]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, "agent"); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_truncated( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VAL]] to i32 +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_32x4B_truncated(global int* gaddr, long val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent"); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_extended( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i8 noundef signext [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[VAL]] to i32 +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_32x4B_extended(global int* gaddr, char val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent"); +} + +//. +// CHECK: [[META4]] = !{!"agent"} +// CHECK: [[META5]] = !{!""} +// CHECK: [[META6]] = !{!"workgroup"} +// CHECK: [[META7]] = !{!"singlethread"} +//. diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl new file mode 100644 index 0000000000000..0ab9a5a43e718 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val, const char* syncscope) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +int test_amdgcn_cooperative_atomic_load_32x4B(global int* gaddr, const char* syncscope) +{ + return __builtin_amdgcn_cooperative_atomic_load_32x4B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val, const char* syncscope) +{ + __builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr, const char* syncscope) +{ + return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val, const char* syncscope) +{ + __builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr, const char* syncscope) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +v4i test_amdgcn_cooperative_atomic_load_8x16B_release(global v4i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELEASE, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} + +v4i test_amdgcn_cooperative_atomic_load_8x16B_acq_rel(global v4i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__sharedptr(local int* addr, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_RELAXED, ""); // expected-error {{cooperative atomic requires a global or generic pointer}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__ordering_not_imm(local int* addr, int ord, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, ord, ""); // expected-error {{argument to '__builtin_amdgcn_cooperative_atomic_store_32x4B' must be a constant integer}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__acquire(int* addr, int ord, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQUIRE, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__acq_rel(int* addr, int ord, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4a91b40f0a2e6..5bbc16f2dc743 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3769,6 +3769,36 @@ def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4" DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty], [IntrNoMem, IntrSpeculatable]>; +class AMDGPUCooperativeAtomicStore : Intrinsic < + [], + [llvm_anyptr_ty, // pointer to store to + Ty, // value to store + llvm_i32_ty, // C ABI Atomic Ordering ID + llvm_metadata_ty], // syncscope + [IntrWriteMem, WriteOnly>, NoCapture>, ImmArg>, + IntrNoCallback, IntrNoFree, IntrConvergent], + "", + [SDNPMemOperand, SDNPMayStore] +>; + +class AMDGPUCooperativeAtomicLoad : Intrinsic < + [Ty], + [llvm_anyptr_ty, // pointer to load from + llvm_i32_ty, // C ABI Atomic Ordering ID + llvm_metadata_ty], // syncscope + [IntrReadMem, ReadOnly>, NoCapture>, ImmArg>, + IntrNoCallback, IntrNoFree, IntrConvergent], + "", + [SDNPMemOperand, SDNPMayLoad] +>; + +def int_amdgcn_cooperative_atomic_load_32x4B : AMDGPUCooperativeAtomicLoad; +def int_amdgcn_cooperative_atomic_store_32x4B : AMDGPUCooperativeAtomicStore; +def int_amdgcn_cooperative_atomic_load_16x8B : AMDGPUCooperativeAtomicLoad; +def int_amdgcn_cooperative_atomic_store_16x8B : AMDGPUCooperativeAtomicStore; +def int_amdgcn_cooperative_atomic_load_8x16B : AMDGPUCooperativeAtomicLoad; +def int_amdgcn_cooperative_atomic_store_8x16B : AMDGPUCooperativeAtomicStore; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index b4dd88a809d4e..ef88c9507c86d 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1968,6 +1968,12 @@ def atomic_load_nonext_64 : let MemoryVT = i64; } +def atomic_load_nonext_128 : + PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> { + let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? + let MemoryVT = i128; +} + def atomic_load_zext_8 : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? @@ -2199,6 +2205,13 @@ def atomic_store_64 : let MemoryVT = i64; } +def atomic_store_128 : + PatFrag<(ops node:$val, node:$ptr), + (atomic_store node:$val, node:$ptr)> { + let IsAtomic = true; + let MemoryVT = i128; +} + //===----------------------------------------------------------------------===// // Selection DAG Pattern Support. // diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 48007be924bda..b28515083d1d2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6787,6 +6787,28 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "invalid vector type for format", &Call, Src1, Call.getArgOperand(2)); break; } + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: { + // Check we only use this intrinsic on the FLAT or GLOBAL address spaces. + Value *PtrArg = Call.getArgOperand(0); + const unsigned AS = PtrArg->getType()->getPointerAddressSpace(); + Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS, + "cooperative atomic intrinsics require a generic or global pointer", + &Call, PtrArg); + + // Last argument must be a MD string + auto *Op = cast(Call.getArgOperand(Call.arg_size() - 1)); + MDNode *MD = cast(Op->getMetadata()); + Check((MD->getNumOperands() == 1) && isa(MD->getOperand(0)), + "cooperative atomic intrinsics require that the last argument is a " + "metadata string", + &Call, Op); + break; + } case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: { Value *V = Call.getArgOperand(0); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index efcd87e466207..bd443b5b6f1e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -509,6 +509,10 @@ def atomic_load_nonext_64_#as : PatFrag<(ops node:$ptr), (atomic_load_nonext_64 let IsAtomic = 1; } +def atomic_load_nonext_128_#as : PatFrag<(ops node:$ptr), (atomic_load_nonext_128 node:$ptr)> { + let IsAtomic = 1; +} + def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_8 node:$ptr)> { let IsAtomic = 1; } @@ -573,6 +577,8 @@ def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr), (atomic_store_32 node:$val, node:$ptr)>; def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr), (atomic_store_64 node:$val, node:$ptr)>; +def atomic_store_128_#as : PatFrag<(ops node:$val, node:$ptr), + (atomic_store_128 node:$val, node:$ptr)>; } // End let IsAtomic = 1, AddressSpaces = ... } // End foreach as diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 55a76f1172cb9..64ba53c5d45d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -7819,6 +7819,20 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: + assert(MI.hasOneMemOperand() && "Expected IRTranslator to set MemOp!"); + B.buildLoad(MI.getOperand(0), MI.getOperand(2), **MI.memoperands_begin()); + MI.eraseFromParent(); + return true; + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: + assert(MI.hasOneMemOperand() && "Expected IRTranslator to set MemOp!"); + B.buildStore(MI.getOperand(2), MI.getOperand(1), **MI.memoperands_begin()); + MI.eraseFromParent(); + return true; default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 69aef421bba45..1617f7954a5ee 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1944,6 +1944,7 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi defm : FlatLoadPats ; defm : FlatLoadPats ; defm : FlatLoadPats ; +defm : FlatLoadPats ; defm : FlatStorePats ; defm : FlatStorePats ; @@ -1968,6 +1969,7 @@ defm : FlatStorePats ; defm : FlatStorePats ; defm : FlatStorePats ; defm : FlatStorePats ; +defm : FlatStorePats ; defm : FlatStorePats ; defm : FlatStorePats ; @@ -2114,6 +2116,7 @@ defm : GlobalFLATStorePats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; @@ -2154,6 +2157,7 @@ defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; +defm : GlobalFLATStorePats ; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c5237893ed433..b254502dd6f53 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1269,20 +1269,55 @@ static unsigned getIntrMemWidth(unsigned IntrID) { case Intrinsic::amdgcn_global_load_async_to_lds_b32: case Intrinsic::amdgcn_cluster_load_async_to_lds_b32: case Intrinsic::amdgcn_global_store_async_from_lds_b32: + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: return 32; case Intrinsic::amdgcn_global_load_async_to_lds_b64: case Intrinsic::amdgcn_cluster_load_async_to_lds_b64: case Intrinsic::amdgcn_global_store_async_from_lds_b64: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: return 64; case Intrinsic::amdgcn_global_load_async_to_lds_b128: case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: case Intrinsic::amdgcn_global_store_async_from_lds_b128: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: return 128; default: llvm_unreachable("Unknown width"); } } +static void getCoopAtomicOperandsInfo(const CallInst &CI, bool IsLoad, + TargetLoweringBase::IntrinsicInfo &Info) { + Value *OrderingArg = CI.getArgOperand(IsLoad ? 1 : 2); + unsigned Ord = cast(OrderingArg)->getZExtValue(); + switch (AtomicOrderingCABI(Ord)) { + case AtomicOrderingCABI::acquire: + Info.order = AtomicOrdering::Acquire; + break; + case AtomicOrderingCABI::release: + Info.order = AtomicOrdering::Release; + break; + case AtomicOrderingCABI::seq_cst: + Info.order = AtomicOrdering::SequentiallyConsistent; + break; + default: + Info.order = AtomicOrdering::Monotonic; + break; + } + + Info.flags = + (IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore); + Info.flags |= MOCooperative; + + MDNode *ScopeMD = cast( + cast(CI.getArgOperand(IsLoad ? 2 : 3))->getMetadata()); + StringRef Scope = cast(ScopeMD->getOperand(0))->getString(); + Info.ssid = CI.getContext().getOrInsertSyncScopeID(Scope); +} + bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, MachineFunction &MF, @@ -1532,6 +1567,26 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags |= MachineMemOperand::MOLoad; return true; } + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID)); + Info.ptrVal = CI.getOperand(0); + Info.align.reset(); + getCoopAtomicOperandsInfo(CI, /*IsLoad=*/true, Info); + return true; + } + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: { + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID)); + Info.ptrVal = CI.getArgOperand(0); + Info.align.reset(); + getCoopAtomicOperandsInfo(CI, /*IsLoad=*/false, Info); + return true; + } case Intrinsic::amdgcn_ds_gws_init: case Intrinsic::amdgcn_ds_gws_barrier: case Intrinsic::amdgcn_ds_gws_sema_v: @@ -10270,6 +10325,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops); return SDValue(NewMI, 0); } + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: { + MemIntrinsicSDNode *MII = cast(Op); + SDValue Chain = Op->getOperand(0); + SDValue Ptr = Op->getOperand(2); + EVT VT = Op->getValueType(0); + return DAG.getAtomicLoad(ISD::NON_EXTLOAD, DL, MII->getMemoryVT(), VT, + Chain, Ptr, MII->getMemOperand()); + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = @@ -10902,6 +10967,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: { + MemIntrinsicSDNode *MII = cast(Op); + SDValue Chain = Op->getOperand(0); + SDValue Ptr = Op->getOperand(2); + SDValue Val = Op->getOperand(3); + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MII->getMemoryVT(), Chain, Val, + Ptr, MII->getMemOperand()); + } default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 85981bc22603e..fe849cafb65d1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9567,6 +9567,7 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { { {MONoClobber, "amdgpu-noclobber"}, {MOLastUse, "amdgpu-last-use"}, + {MOCooperative, "amdgpu-cooperative"}, }; return ArrayRef(TargetFlags); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1070d4824aa14..c964d02ee2b97 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -48,6 +48,10 @@ static const MachineMemOperand::Flags MONoClobber = static const MachineMemOperand::Flags MOLastUse = MachineMemOperand::MOTargetFlag2; +/// Mark the MMO of cooperative load/store atomics. +static const MachineMemOperand::Flags MOCooperative = + MachineMemOperand::MOTargetFlag3; + /// Utility to store machine instructions worklist. struct SIInstrWorklist { SIInstrWorklist() = default; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 3006eaf468bdd..6ab4eb4bde97c 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -103,6 +103,7 @@ class SIMemOpInfo final { bool IsVolatile = false; bool IsNonTemporal = false; bool IsLastUse = false; + bool IsCooperative = false; SIMemOpInfo( AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent, @@ -112,14 +113,15 @@ class SIMemOpInfo final { bool IsCrossAddressSpaceOrdering = true, AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent, bool IsVolatile = false, bool IsNonTemporal = false, - bool IsLastUse = false) + bool IsLastUse = false, bool IsCooperative = false) : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope), OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace), IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering), IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal), - IsLastUse(IsLastUse) { + IsLastUse(IsLastUse), IsCooperative(IsCooperative) { if (Ordering == AtomicOrdering::NotAtomic) { + assert(!IsCooperative && "Cannot be cooperative & non-atomic!"); assert(Scope == SIAtomicScope::NONE && OrderingAddrSpace == SIAtomicAddrSpace::NONE && !IsCrossAddressSpaceOrdering && @@ -209,6 +211,9 @@ class SIMemOpInfo final { /// create this SIMemOpInfo is last use, false otherwise. bool isLastUse() const { return IsLastUse; } + /// \returns True if this is a cooperative load or store atomic. + bool isCooperative() const { return IsCooperative; } + /// \returns True if ordering constraint of the machine instruction used to /// create this SIMemOpInfo is unordered or higher, false otherwise. bool isAtomic() const { @@ -325,6 +330,12 @@ class SICacheControl { return false; }; + /// Handle cooperative load/store atomics. + virtual bool handleCooperativeAtomic(MachineInstr &MI) const { + llvm_unreachable( + "cooperative atomics are not available on this architecture"); + } + /// Inserts any necessary instructions at position \p Pos relative /// to instruction \p MI to ensure memory instructions before \p Pos of kind /// \p Op associated with address spaces \p AddrSpace have completed. Used @@ -612,6 +623,8 @@ class SIGfx12CacheControl : public SIGfx11CacheControl { bool finalizeStore(MachineInstr &MI, bool Atomic) const override; + virtual bool handleCooperativeAtomic(MachineInstr &MI) const override; + bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, Position Pos) const override; @@ -812,6 +825,7 @@ std::optional SIMemOpAccess::constructFromMIWithMMO( bool IsNonTemporal = true; bool IsVolatile = false; bool IsLastUse = false; + bool IsCooperative = false; // Validator should check whether or not MMOs cover the entire set of // locations accessed by the memory instruction. @@ -819,6 +833,7 @@ std::optional SIMemOpAccess::constructFromMIWithMMO( IsNonTemporal &= MMO->isNonTemporal(); IsVolatile |= MMO->isVolatile(); IsLastUse |= MMO->getFlags() & MOLastUse; + IsCooperative |= MMO->getFlags() & MOCooperative; InstrAddrSpace |= toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace()); AtomicOrdering OpOrdering = MMO->getSuccessOrdering(); @@ -860,7 +875,7 @@ std::optional SIMemOpAccess::constructFromMIWithMMO( } return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace, IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile, - IsNonTemporal, IsLastUse); + IsNonTemporal, IsLastUse, IsCooperative); } std::optional @@ -2615,6 +2630,19 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const { return false; } +bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &MI) const { + if (!ST.hasGFX1250Insts()) + return false; + + // Cooperative atomics need to be SCOPE_DEV or higher. + MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol); + assert(CPol && "No CPol operand?"); + const unsigned Scope = CPol->getImm() & CPol::SCOPE; + if (Scope < CPol::SCOPE_DEV) + return setScope(MI, CPol::SCOPE_DEV); + return false; +} + bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { @@ -2679,6 +2707,11 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, MOI.getOrderingAddrSpace()); } + // Handle cooperative atomics after cache bypass step, as it may override + // the scope of the instruction to a greater scope. + if (MOI.isCooperative()) + Changed |= CC->handleCooperativeAtomic(*MI); + if (Order == AtomicOrdering::SequentiallyConsistent) Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(), SIMemOp::LOAD | SIMemOp::STORE, @@ -2724,6 +2757,11 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, MOI.getOrderingAddrSpace()); } + // Handle cooperative atomics after cache bypass step, as it may override + // the scope of the instruction to a greater scope. + if (MOI.isCooperative()) + Changed |= CC->handleCooperativeAtomic(*MI); + if (MOI.getOrdering() == AtomicOrdering::Release || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= CC->insertRelease(MI, MOI.getScope(), diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll new file mode 100644 index 0000000000000..e3ec4d1f0f67a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll @@ -0,0 +1,521 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1) + ret void +} + +!0 = !{ !"agent" } +!1 = !{ !"agent-one-as" } + +;; GFX1250: {{.*}} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-basic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-basic.ll new file mode 100644 index 0000000000000..1a2d2fe3f441c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-basic.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +; test behavior with unsupported 'consume' and 'acq_rel' ordering + +define i32 @test_load_consume(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_load_consume: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 1, metadata !0) + ret i32 %0 +} + +define i32 @test_load_acqrel(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_load_acqrel: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 4, metadata !0) + ret i32 %0 +} + +define void @test_store_acqrel(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_store_acqrel: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 4, metadata !0) + ret void +} + +!0 = !{ !"" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll new file mode 100644 index 0000000000000..f97bced6364db --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll @@ -0,0 +1,479 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1) + ret void +} + +!0 = !{ !"singlethread" } +!1 = !{ !"singlethread-one-as" } + +;; GFX1250: {{.*}} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-system.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-system.ll new file mode 100644 index 0000000000000..c4234cc0de06a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-system.ll @@ -0,0 +1,533 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_wb scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_storecnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1) + ret void +} + +!0 = !{ !"" } +!1 = !{ !"one-as" } + +;; GFX1250: {{.*}} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-wavefront.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-wavefront.ll new file mode 100644 index 0000000000000..329b7612426dd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-wavefront.ll @@ -0,0 +1,479 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1) + ret void +} + +!0 = !{ !"wavefront" } +!1 = !{ !"wavefront-one-as" } + +;; GFX1250: {{.*}} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll new file mode 100644 index 0000000000000..e86f0e0083805 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll @@ -0,0 +1,479 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0) + ret void +} + +define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0) + ret i32 %0 +} + +define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0) + ret <2 x i32> %0 +} + +define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0) + ret <4 x i32> %0 +} + +define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0) + ret void +} + +define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1) + ret void +} + +define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1) + ret i32 %0 +} + +define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1) + ret <2 x i32> %0 +} + +define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + %0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1) + ret <4 x i32> %0 +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1) + ret void +} + +define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) { +; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1) + ret void +} + +!0 = !{ !"workgroup" } +!1 = !{ !"workgroup-one-as" } + +;; GFX1250: {{.*}} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/Verifier/AMDGPU/llvm.amdgcn.cooperative.atomic.ll b/llvm/test/Verifier/AMDGPU/llvm.amdgcn.cooperative.atomic.ll new file mode 100644 index 0000000000000..33c8a59bb2aa4 --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/llvm.amdgcn.cooperative.atomic.ll @@ -0,0 +1,46 @@ +; RUN: not opt -mtriple=amdgcn -mcpu=gfx1250 -passes=verify -disable-output <%s 2>&1 | FileCheck %s + +; CHECK: cooperative atomic intrinsics require a generic or global pointer +define i32 @load_local_as(ptr addrspace(3) noundef %addr) { +entry: + %res = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p3(ptr addrspace(3) %addr, i32 0, metadata !0) + ret i32 %res +} + +; CHECK: cooperative atomic intrinsics require a generic or global pointer +define i32 @load_private_as(ptr addrspace(5) noundef %addr) { +entry: + %res = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p5(ptr addrspace(5) %addr, i32 0, metadata !0) + ret i32 %res +} + +; CHECK: cooperative atomic intrinsics require a generic or global pointer +define void @store_local_as(ptr addrspace(3) noundef %addr, i32 noundef %val) { +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p3(ptr addrspace(3) %addr, i32 %val, i32 0, metadata !0) + ret void +} + +; CHECK: cooperative atomic intrinsics require a generic or global pointer +define void @store_private_as(ptr addrspace(5) noundef %addr, i32 noundef %val) { +entry: + tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p5(ptr addrspace(5) %addr, i32 %val, i32 0, metadata !0) + ret void +} + + +; CHECK: cooperative atomic intrinsics require that the last argument is a metadata string +define i32 @test_empty_md(ptr noundef readonly %addr) { +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 1, metadata !{}) + ret i32 %0 +} + +; CHECK: cooperative atomic intrinsics require that the last argument is a metadata string +define i32 @test_no_md_str(ptr noundef readonly %addr) { +entry: + %0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 1, metadata !{!{}}) + ret i32 %0 +} + +!0 = !{ !"" }