-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[clang] Add support for cluster sync scope #162575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-amdgpu Author: None (macurtis-amd) ChangesPatch is 400.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162575.diff 15 Files Affected:
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index b4a671e3cfa3c..ec2af2a6f569d 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -164,6 +164,8 @@ Predefined Macros
- Represents wavefront memory scope in HIP (value is 2).
* - ``__HIP_MEMORY_SCOPE_WORKGROUP``
- Represents workgroup memory scope in HIP (value is 3).
+ * - ``__HIP_MEMORY_SCOPE_CLUSTER``
+ - Represents cluster memory scope in HIP (value is 6).
* - ``__HIP_MEMORY_SCOPE_AGENT``
- Represents agent memory scope in HIP (value is 4).
* - ``__HIP_MEMORY_SCOPE_SYSTEM``
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 6bb99c757cd19..bef6e9c14b182 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4846,6 +4846,7 @@ currently supported:
* ``__MEMORY_SCOPE_SYSTEM``
* ``__MEMORY_SCOPE_DEVICE``
* ``__MEMORY_SCOPE_WRKGRP``
+* ``__MEMORY_SCOPE_CLUSTR``
* ``__MEMORY_SCOPE_WVFRNT``
* ``__MEMORY_SCOPE_SINGLE``
diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h
index 5a8d2a7dd02e5..614e5faa78696 100644
--- a/clang/include/clang/Basic/SyncScope.h
+++ b/clang/include/clang/Basic/SyncScope.h
@@ -43,11 +43,13 @@ enum class SyncScope {
SystemScope,
DeviceScope,
WorkgroupScope,
+ ClusterScope,
WavefrontScope,
SingleScope,
HIPSingleThread,
HIPWavefront,
HIPWorkgroup,
+ HIPCluster,
HIPAgent,
HIPSystem,
OpenCLWorkGroup,
@@ -65,6 +67,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
return "device_scope";
case SyncScope::WorkgroupScope:
return "workgroup_scope";
+ case SyncScope::ClusterScope:
+ return "cluster_scope";
case SyncScope::WavefrontScope:
return "wavefront_scope";
case SyncScope::SingleScope:
@@ -75,6 +79,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
return "hip_wavefront";
case SyncScope::HIPWorkgroup:
return "hip_workgroup";
+ case SyncScope::HIPCluster:
+ return "hip_cluster";
case SyncScope::HIPAgent:
return "hip_agent";
case SyncScope::HIPSystem:
@@ -180,7 +186,10 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
Workgroup = 3,
Agent = 4,
System = 5,
- Last = System
+ Cluster = 6,
+ End,
+ Last = End - 1,
+ Count = Last
};
AtomicScopeHIPModel() {}
@@ -193,10 +202,14 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
return SyncScope::HIPWavefront;
case Workgroup:
return SyncScope::HIPWorkgroup;
+ case Cluster:
+ return SyncScope::HIPCluster;
case Agent:
return SyncScope::HIPAgent;
case System:
return SyncScope::HIPSystem;
+ case End:
+ break;
}
llvm_unreachable("Invalid language sync scope value");
}
@@ -207,11 +220,12 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
}
ArrayRef<unsigned> getRuntimeValues() const override {
- static_assert(Last == System, "Does not include all sync scopes");
static const unsigned Scopes[] = {
static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront),
- static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent),
- static_cast<unsigned>(System)};
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
+ static_cast<unsigned>(System), static_cast<unsigned>(Agent)};
+ static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
+ "Does not include all sync scopes");
return llvm::ArrayRef(Scopes);
}
@@ -223,14 +237,17 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
/// Defines the generic atomic scope model.
class AtomicScopeGenericModel : public AtomicScopeModel {
public:
- /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
+ /// The enum values match predefined built-in macros __MEMORY_SCOPE_*.
enum ID {
System = 0,
Device = 1,
Workgroup = 2,
Wavefront = 3,
Single = 4,
- Last = Single
+ Cluster = 5,
+ End,
+ Last = End - 1,
+ Count = End
};
AtomicScopeGenericModel() = default;
@@ -243,10 +260,14 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
return SyncScope::SystemScope;
case Workgroup:
return SyncScope::WorkgroupScope;
+ case Cluster:
+ return SyncScope::ClusterScope;
case Wavefront:
return SyncScope::WavefrontScope;
case Single:
return SyncScope::SingleScope;
+ case End:
+ break;
}
llvm_unreachable("Invalid language sync scope value");
}
@@ -256,11 +277,12 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
}
ArrayRef<unsigned> getRuntimeValues() const override {
- static_assert(Last == Single, "Does not include all sync scopes");
static const unsigned Scopes[] = {
- static_cast<unsigned>(Device), static_cast<unsigned>(System),
- static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
- static_cast<unsigned>(Single)};
+ static_cast<unsigned>(System), static_cast<unsigned>(Device),
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
+ static_cast<unsigned>(Wavefront), static_cast<unsigned>(Single)};
+ static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
+ "Does not include all sync scopes");
return llvm::ArrayRef(Scopes);
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 6596ec06199dc..97b5828011cd4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CGBuiltin.h"
+#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -242,33 +243,33 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
}
// Older builtins had an enum argument for the memory scope.
+ const char *ssn = nullptr;
int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
switch (scope) {
- case 0: // __MEMORY_SCOPE_SYSTEM
+ case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
SSID = llvm::SyncScope::System;
break;
- case 1: // __MEMORY_SCOPE_DEVICE
- if (getTarget().getTriple().isSPIRV())
- SSID = getLLVMContext().getOrInsertSyncScopeID("device");
- else
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
+ ssn = getTarget().getTriple().isSPIRV() ? "device" : "agent";
break;
- case 2: // __MEMORY_SCOPE_WRKGRP
- SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
+ case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
+ ssn = "workgroup";
break;
- case 3: // __MEMORY_SCOPE_WVFRNT
- if (getTarget().getTriple().isSPIRV())
- SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
- else
- SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+ case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
+ ssn = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
+ break;
+ case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
+ ssn = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
break;
- case 4: // __MEMORY_SCOPE_SINGLE
+ case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
SSID = llvm::SyncScope::SingleThread;
break;
default:
SSID = llvm::SyncScope::System;
break;
}
+ if (ssn)
+ SSID = getLLVMContext().getOrInsertSyncScopeID(ssn);
}
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e458a34..c74a1b6098922 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -488,6 +488,10 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
case SyncScope::WavefrontScope:
Name = "wavefront";
break;
+ case SyncScope::HIPCluster:
+ case SyncScope::ClusterScope:
+ Name = "cluster";
+ break;
case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
case SyncScope::WorkgroupScope:
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa63143a66cd..fbf29186faf24 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -90,6 +90,8 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
case SyncScope::OpenCLSubGroup:
case SyncScope::WavefrontScope:
return "subgroup";
+ case SyncScope::HIPCluster:
+ case SyncScope::ClusterScope:
case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
case SyncScope::WorkgroupScope:
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b899fb9c6494a..21ab9dca8f0bd 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -616,6 +616,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_CLUSTER", "6");
if (LangOpts.HIPStdPar) {
Builder.defineMacro("__HIPSTDPAR__");
if (LangOpts.HIPStdParInterposeAlloc) {
@@ -904,6 +905,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__MEMORY_SCOPE_WRKGRP", "2");
Builder.defineMacro("__MEMORY_SCOPE_WVFRNT", "3");
Builder.defineMacro("__MEMORY_SCOPE_SINGLE", "4");
+ Builder.defineMacro("__MEMORY_SCOPE_CLUSTR", "5");
// Define macros for the OpenCL memory scope.
// The values should match AtomicScopeOpenCLModel::ID enum.
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index 545a6c90892c2..c39048120a457 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -1,113 +1,772 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_DEF %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_20 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
-// AMDGCN-LABEL: define hidden i32 @fi1a(
-// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
-// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent") monotonic, align 4
-// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
-// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront") monotonic, align 4
-// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
-// SPIRV: define hidden spir_func i32 @fi1a(
-// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
-// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4
-// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
-// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4
-// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi1a(
+// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
+// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_DEF-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// AMDGCN_CL_DEF-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr
+// AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP1]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP5]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP9]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP11]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = load i32, ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: ret i32 [[TMP12]]
+//
+// AMDGCN_CL_20-LABEL: define hidden i32 @fi1a(
+// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
+// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_20-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: ret i32 [[TMP12]]
+//
+// SPIRV-LABEL: define hidden spir_func i32 @fi1a(
+// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
+// SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4
+// SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP1]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP3]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP5]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("workgroup") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP7]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("subgroup") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP9]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP11]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP12:%.*]] = load i32, ptr [[V]], align 4
+// SPIRV-NEXT: ret i32 [[TMP12]]
+//
int fi1a(int *i) {
int v;
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
+ __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
return v;
}
// AMDGCN-LABEL: define hidden i32 @fi1b(
-// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4
-// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent") monotonic, align 4
-// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
-// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront") monotonic, align 4
-// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4
+// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_...
[truncated]
|
fe2e520
to
4e87322
Compare
6f436b1
to
f90b14c
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/27/builds/17824 Here is the relevant piece of the build log for the reference
|
From Sam Liu: