-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libomptarget][nextgen-plugin] Use SCRELEASE/SCACQUIRE in packet header #85952
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libc @llvm/pr-subscribers-backend-amdgpu Author: Gheorghe-Teodor Bercea (doru1004) ChangesThis patch updates the construction of packet headers to replace the usage of ACQUIRE/RELEASE with SCACQUIRE/SCRELEASE which is now recommended. Full diff: https://github.com/llvm/llvm-project/pull/85952.diff 3 Files Affected:
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index e3911eda2bd82a..7fd45acfd47e97 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -276,7 +276,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
uint32_t header_word = header | (setup << 16u);
- __atomic_store_n((uint32_t *)&packet->header, header_word, __ATOMIC_RELEASE);
+ __atomic_store_n(reinterpret_cast<uint32_t *>(packet), header_word,
+ __ATOMIC_RELEASE);
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);
// Wait until the kernel has completed execution on the device. Periodically
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h b/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
index 64a1d3308aed0b..0ac7661d7e0076 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
@@ -216,8 +216,8 @@ typedef enum { HSA_FENCE_SCOPE_SYSTEM = 2 } hsa_fence_scope_t;
typedef enum {
HSA_PACKET_HEADER_TYPE = 0,
- HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
- HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
+ HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE = 9,
+ HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE = 11
} hsa_packet_header_t;
typedef enum {
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index fce7454bf2800d..c147cefe58e9b4 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -826,15 +826,15 @@ struct AMDGPUQueueTy {
/// Assumes the queue lock is acquired.
void publishKernelPacket(uint64_t PacketId, uint16_t Setup,
hsa_kernel_dispatch_packet_t *Packet) {
- uint32_t *PacketPtr = reinterpret_cast<uint32_t *>(Packet);
-
- uint16_t Header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
- Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
- Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+ uint16_t Header =
+ (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
+ (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
+ (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
// Publish the packet. Do not modify the package after this point.
uint32_t HeaderWord = Header | (Setup << 16u);
- __atomic_store_n(PacketPtr, HeaderWord, __ATOMIC_RELEASE);
+ __atomic_store_n(reinterpret_cast<uint32_t *>(Packet), HeaderWord,
+ __ATOMIC_RELEASE);
// Signal the doorbell about the published packet.
hsa_signal_store_relaxed(Queue->doorbell_signal, PacketId);
@@ -845,15 +845,16 @@ struct AMDGPUQueueTy {
/// barrier dependencies (signals) are satisfied. Assumes the queue is locked
void publishBarrierPacket(uint64_t PacketId,
hsa_barrier_and_packet_t *Packet) {
- uint32_t *PacketPtr = reinterpret_cast<uint32_t *>(Packet);
uint16_t Setup = 0;
- uint16_t Header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
- Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
- Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+ uint16_t Header =
+ (HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
+ (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
+ (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
// Publish the packet. Do not modify the package after this point.
uint32_t HeaderWord = Header | (Setup << 16u);
- __atomic_store_n(PacketPtr, HeaderWord, __ATOMIC_RELEASE);
+ __atomic_store_n(reinterpret_cast<uint32_t *>(Packet), HeaderWord,
+ __ATOMIC_RELEASE);
// Signal the doorbell about the published packet.
hsa_signal_store_relaxed(Queue->doorbell_signal, PacketId);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see, dynamic HSA. I always forget about that.
Personally I would've just fixed this with direct push that just adds these two enum members. |
Yep, had to revert the previous commit and had to open a new PR too. |
Self-reviewed PRs muddy the waters. If you're going to self review just do a git push and be done with it, I think that's the LLVM policy. |
This patch updates the construction of packet headers to replace the usage of ACQUIRE/RELEASE with SCACQUIRE/SCRELEASE which is now recommended.
The patch also ensures consistency between kernel dispatches.