Skip to content

Commit

Permalink
[SYCL][Reduction] Group counter should use at least memory_order::acq…
Browse files Browse the repository at this point in the history
…_rel

From https://en.cppreference.com/w/cpp/atomic/memory_order:

> Atomic operations tagged memory_order_relaxed are not synchronization
> operations; they do not impose an order among concurrent memory
> accesses. They only guarantee atomicity and modification order
> consistency.

Yet we want to use that counter exactly for the synchronization purposes
- to decide which WG finished last and should perform the final step.

The same issue had been fixed in DPC++ at intel/llvm#8058.
  • Loading branch information
aelovikov-intel committed Sep 11, 2023
1 parent 9081d36 commit 89a4234
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
4 changes: 2 additions & 2 deletions core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType,
value_count, reducer, false, wgroup_size);

if (local_id == 0) {
sycl::atomic_ref<unsigned, sycl::memory_order::relaxed,
sycl::atomic_ref<unsigned, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>
scratch_flags_ref(*scratch_flags);
Expand Down Expand Up @@ -260,7 +260,7 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType,
device_accessible_result_ptr, reducer, false, wgroup_size);

if (local_id == 0) {
sycl::atomic_ref<unsigned, sycl::memory_order::relaxed,
sycl::atomic_ref<unsigned, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>
scratch_flags_ref(*scratch_flags);
Expand Down
4 changes: 2 additions & 2 deletions core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType,
value_count, reducer, false, std::min(size, wgroup_size));

if (local_id == 0) {
sycl::atomic_ref<unsigned, sycl::memory_order::relaxed,
sycl::atomic_ref<unsigned, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>
scratch_flags_ref(*scratch_flags);
Expand Down Expand Up @@ -202,7 +202,7 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType,
std::min(size, wgroup_size));

if (local_id == 0) {
sycl::atomic_ref<unsigned, sycl::memory_order::relaxed,
sycl::atomic_ref<unsigned, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>
scratch_flags_ref(*scratch_flags);
Expand Down
4 changes: 2 additions & 2 deletions core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType,
item.get_local_range()[1]));

if (local_id == 0) {
sycl::atomic_ref<unsigned, sycl::memory_order::relaxed,
sycl::atomic_ref<unsigned, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>
scratch_flags_ref(*scratch_flags);
Expand Down Expand Up @@ -260,7 +260,7 @@ class Kokkos::Impl::ParallelReduce<CombinedFunctorReducerType,
item.get_local_range()[1]));

if (local_id == 0) {
sycl::atomic_ref<unsigned, sycl::memory_order::relaxed,
sycl::atomic_ref<unsigned, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>
scratch_flags_ref(*scratch_flags);
Expand Down

0 comments on commit 89a4234

Please sign in to comment.