[AMDGPU][Clang] use ScopeModel to translate integer scopes [NFC]#198250
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang-codegen Author: Sameer Sahasrabuddhe (ssahasra) ChangesAssisted-By: Claude Opus 4.6 Full diff: https://github.com/llvm/llvm-project/pull/198250.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 751cd9847bd31..ce4a53c768d35 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -383,6 +383,22 @@ static llvm::AtomicOrdering mapCABIAtomicOrdering(unsigned AO) {
llvm_unreachable("Unknown AtomicOrderingCABI enum");
}
+/// Convert a __MEMORY_SCOPE_* integer constant to a metadata node containing
+/// the target-specific sync scope string.
+static llvm::MetadataAsValue *
+emitScopeMD(CodeGenFunction &CGF, unsigned ScopeInt,
+ llvm::AtomicOrdering AO =
+ llvm::AtomicOrdering::SequentiallyConsistent) {
+ AtomicScopeGenericModel ScopeModel;
+ SyncScope Scope = ScopeModel.map(ScopeInt);
+ StringRef ScopeStr = CGF.CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(
+ CGF.CGM.getLangOpts(), Scope, AO);
+ llvm::LLVMContext &Ctx = CGF.CGM.getLLVMContext();
+ llvm::MDNode *MD =
+ llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
+ return llvm::MetadataAsValue::get(Ctx, MD);
+}
+
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
// \p Order takes a C++11 compatible memory-ordering specifier and converts
@@ -927,22 +943,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
break;
}
- LLVMContext &Ctx = CGM.getLLVMContext();
llvm::Type *LoadTy = ConvertType(E->getType());
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
auto *AOExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(1)));
auto *ScopeExpr = cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(2)));
-
- auto Scope = static_cast<SyncScope>(ScopeExpr->getZExtValue());
llvm::AtomicOrdering AO = mapCABIAtomicOrdering(AOExpr->getZExtValue());
- StringRef ScopeStr = CGM.getTargetCodeGenInfo().getLLVMSyncScopeStr(
- CGM.getLangOpts(), Scope, AO);
-
- llvm::MDNode *MD =
- llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, ScopeStr)});
- llvm::Value *ScopeMD = llvm::MetadataAsValue::get(Ctx, MD);
+ llvm::Value *ScopeMD = emitScopeMD(*this, ScopeExpr->getZExtValue(), AO);
llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
return Builder.CreateCall(F, {Addr, AOExpr, ScopeMD});
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
index 8ecd6ba61a03e..fcc246cecd1dd 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
@@ -64,3 +64,23 @@ v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr)
{
return __builtin_amdgcn_flat_load_monitor_b128(inptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32_wavefront(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 0, metadata [[META12:![0-9]+]])
+// CHECK-GFX1250-NEXT: ret i32 [[TMP0]]
+//
+int test_amdgcn_global_load_monitor_b32_wavefront(global int* inptr)
+{
+ return __builtin_amdgcn_global_load_monitor_b32(inptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32_single(
+// CHECK-GFX1250-NEXT: entry:
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 0, metadata [[META13:![0-9]+]])
+// CHECK-GFX1250-NEXT: ret i32 [[TMP0]]
+//
+int test_amdgcn_global_load_monitor_b32_single(global int* inptr)
+{
+ return __builtin_amdgcn_global_load_monitor_b32(inptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
Assisted-By: Claude Opus 4.6
2f966a9 to
39e36ba
Compare
|
This change can now be reviewed independently from #198245 |
|
This is now a general refactoring that replaces special handling of integer scopes with a call to |
| } | ||
|
|
||
| // Map a __MEMORY_SCOPE_* integer constant to the AMDGPU-specific syncscope. | ||
| // Invalid scope values are mapped to system scope (empty string). |
There was a problem hiding this comment.
I understand this is the current behavior, though I wonder whether we should error out instead,
There was a problem hiding this comment.
Maybe, but perhaps there is a better place to error out, such as the AMDGPU verifier. This function being refactored here might not occur on all paths. It's just providing a reasonable default within its area of influence that could be changed to unreachable if there was a verifier.
There was a problem hiding this comment.
But the verifier doesn't see the value right?
There was a problem hiding this comment.
I really haven't thought about it much. It's just a refactor that establishes a consistent handling of scopes for now.
The assumption here is that AMDGPU builtins (typically suffixed with
__builtin_amdgcn) use the__MEMORY_SCOPE_*enumeration, and not the__HIP_MEMORY_SCOPE_*enumeration (which is how it should be).Assisted-By: Claude Opus 4.6