diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 751cd9847bd31..cb883e8780e59 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -383,6 +383,30 @@ static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO) { llvm_unreachable("Unknown AtomicOrderingCABI enum"); } +// Map a __MEMORY_SCOPE_* integer constant to the AMDGPU-specific syncscope. +// Invalid scope values are mapped to system scope (empty string). +static StringRef getAMDGPUSyncScopeStr(CodeGenModule &CGM, unsigned ScopeInt, + llvm::AtomicOrdering AO) { + AtomicScopeGenericModel ScopeModel; + if (!ScopeModel.isValid(ScopeInt)) + return ""; + clang::SyncScope Scope = ScopeModel.map(ScopeInt); + return CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(CGM.getLangOpts(), + Scope, AO); +} + +/// Convert a __MEMORY_SCOPE_* integer constant to a metadata node containing +/// the target-specific sync scope string. +static llvm::MetadataAsValue *emitScopeMD( + CodeGenFunction &CGF, unsigned ScopeInt, + llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent) { + StringRef ScopeStr = getAMDGPUSyncScopeStr(CGF.CGM, ScopeInt, AO); + llvm::LLVMContext &Ctx = CGF.CGM.getLLVMContext(); + llvm::MDNode *MD = + llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)}); + return llvm::MetadataAsValue::get(Ctx, MD); +} + // For processing memory ordering and memory scope arguments of various // amdgcn builtins. // \p Order takes a C++11 compatible memory-ordering specifier and converts @@ -407,33 +431,9 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, } // Older builtins had an enum argument for the memory scope. - const char *SSN = nullptr; - int scope = cast(Scope)->getZExtValue(); - switch (scope) { - case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM - SSID = llvm::SyncScope::System; - break; - case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE - SSN = getTarget().getTriple().isSPIRV() ? "device" : "agent"; - break; - case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP - SSN = "workgroup"; - break; - case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR - SSN = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster"; - break; - case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT - SSN = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront"; - break; - case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE - SSID = llvm::SyncScope::SingleThread; - break; - default: - SSID = llvm::SyncScope::System; - break; - } - if (SSN) - SSID = getLLVMContext().getOrInsertSyncScopeID(SSN); + unsigned scope = cast(Scope)->getZExtValue(); + StringRef SSN = getAMDGPUSyncScopeStr(CGM, scope, AO); + SSID = getLLVMContext().getOrInsertSyncScopeID(SSN); } void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, @@ -927,22 +927,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, break; } - LLVMContext &Ctx = CGM.getLLVMContext(); llvm::Type *LoadTy = ConvertType(E->getType()); llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); auto *AOExpr = cast(EmitScalarExpr(E->getArg(1))); auto *ScopeExpr = cast(EmitScalarExpr(E->getArg(2))); - - auto Scope = static_cast(ScopeExpr->getZExtValue()); llvm::AtomicOrdering AO = mapCABIAtomicOrdering(AOExpr->getZExtValue()); - StringRef ScopeStr = CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr( - CGM.getLangOpts(), Scope, AO); - - llvm::MDNode *MD = - llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)}); - llvm::Value *ScopeMD = llvm::MetadataAsValue::get(Ctx, MD); + llvm::Value *ScopeMD = emitScopeMD(*this, ScopeExpr->getZExtValue(), AO); llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy}); return Builder.CreateCall(F, {Addr, AOExpr, ScopeMD}); } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl index 8ecd6ba61a03e..4e285fcb217e2 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl @@ -64,3 +64,30 @@ v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr) { return __builtin_amdgcn_flat_load_monitor_b128(inptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32_wavefront( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 0, metadata [[META12:![0-9]+]]) +// CHECK-GFX1250-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_global_load_monitor_b32_wavefront(global int* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b32(inptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32_single( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 0, metadata [[META13:![0-9]+]]) +// CHECK-GFX1250-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_global_load_monitor_b32_single(global int* inptr) +{ + return __builtin_amdgcn_global_load_monitor_b32(inptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); +} + +// CHECK-GFX1250: [[META8]] = !{!""} +// CHECK-GFX1250: [[META9]] = !{!"agent"} +// CHECK-GFX1250: [[META10]] = !{!"workgroup"} +// CHECK-GFX1250: [[META11]] = !{!"cluster"} +// CHECK-GFX1250: [[META12]] = !{!"wavefront"} +// CHECK-GFX1250: [[META13]] = !{!"singlethread"} diff --git a/clang/test/SemaHIP/incorrect-atomic-scope.hip b/clang/test/SemaHIP/incorrect-atomic-scope.hip index 07499b412aa30..1c5aaee710051 100644 --- a/clang/test/SemaHIP/incorrect-atomic-scope.hip +++ b/clang/test/SemaHIP/incorrect-atomic-scope.hip @@ -14,7 +14,7 @@ // // CHECK-LABEL: test_intrinsic_metadata // CHECK: call i32 @llvm.amdgcn.flat.load.monitor{{.*}} metadata [[SCOPE:![0-9]+]] -// CHECK: [[SCOPE]] = !{!"wavefront"} +// CHECK: [[SCOPE]] = !{!"singlethread"} __device__ void test_builtin_rmw(__attribute__((address_space(3))) float *out, float src) { *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT, false);