Skip to content

Commit

Permalink
Get rid of ZeroMemset's silly trailing value argument (kokkos#6769)
Browse files Browse the repository at this point in the history
* ZeroMemset does not want that trailing value argument

* Prefer C array of `unsigned char` for the zero-initiliazed storage
  • Loading branch information
dalg24 committed Feb 1, 2024
1 parent af806fb commit 4c94f08
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 27 deletions.
4 changes: 2 additions & 2 deletions core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace Impl {

template <class T, class... P>
struct ZeroMemset<Kokkos::Cuda, View<T, P...>> {
ZeroMemset(const Kokkos::Cuda& exec_space_instance, const View<T, P...>& dst,
typename View<T, P...>::const_value_type&) {
ZeroMemset(const Kokkos::Cuda& exec_space_instance,
const View<T, P...>& dst) {
KOKKOS_IMPL_CUDA_SAFE_CALL(
(exec_space_instance.impl_internal_space_instance()
->cuda_memset_async_wrapper(
Expand Down
3 changes: 1 addition & 2 deletions core/src/HIP/Kokkos_HIP_ZeroMemset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ namespace Impl {

template <class T, class... P>
struct ZeroMemset<HIP, View<T, P...>> {
ZeroMemset(const HIP& exec_space, const View<T, P...>& dst,
typename View<T, P...>::const_value_type&) {
ZeroMemset(const HIP& exec_space, const View<T, P...>& dst) {
KOKKOS_IMPL_HIP_SAFE_CALL(hipMemsetAsync(
dst.data(), 0, dst.size() * sizeof(typename View<T, P...>::value_type),
exec_space.hip_stream()));
Expand Down
17 changes: 8 additions & 9 deletions core/src/Kokkos_CopyViews.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,13 +1336,12 @@ inline void contiguous_fill(
// Default implementation for execution spaces that don't provide a definition
template <typename ExecutionSpace, class ViewType>
struct ZeroMemset {
ZeroMemset(const ExecutionSpace& exec_space, const ViewType& dst,
typename ViewType::const_value_type& value) {
contiguous_fill(exec_space, dst, value);
}

ZeroMemset(const ViewType& dst, typename ViewType::const_value_type& value) {
contiguous_fill(ExecutionSpace(), dst, value);
ZeroMemset(const ExecutionSpace& exec_space, const ViewType& dst) {
using ValueType = typename ViewType::value_type;
alignas(alignof(ValueType)) unsigned char
zero_initialized_storage[sizeof(ValueType)] = {};
contiguous_fill(exec_space, dst,
*reinterpret_cast<ValueType*>(zero_initialized_storage));
}
};

Expand All @@ -1360,7 +1359,7 @@ contiguous_fill_or_memset(
&& !std::is_same_v<ExecutionSpace, Kokkos::OpenMP>
#endif
)
ZeroMemset(exec_space, dst, value);
ZeroMemset(exec_space, dst);
else
contiguous_fill(exec_space, dst, value);
}
Expand Down Expand Up @@ -1392,7 +1391,7 @@ contiguous_fill_or_memset(
// leading to the significant performance issues
#ifndef KOKKOS_ARCH_A64FX
if (Impl::is_zero_byte(value))
ZeroMemset(exec, dst, value);
ZeroMemset(exec, dst);
else
#endif
contiguous_fill(exec, dst, value);
Expand Down
3 changes: 1 addition & 2 deletions core/src/SYCL/Kokkos_SYCL_ZeroMemset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ namespace Impl {
template <class T, class... P>
struct ZeroMemset<Kokkos::Experimental::SYCL, View<T, P...>> {
ZeroMemset(const Kokkos::Experimental::SYCL& exec_space,
const View<T, P...>& dst,
typename View<T, P...>::const_value_type&) {
const View<T, P...>& dst) {
auto event = exec_space.impl_internal_space_instance()->m_queue->memset(
dst.data(), 0, dst.size() * sizeof(typename View<T, P...>::value_type));
#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES
Expand Down
3 changes: 1 addition & 2 deletions core/src/Serial/Kokkos_Serial_ZeroMemset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ struct ZeroMemset<
std::conditional_t<!std::is_same<Serial, DefaultHostExecutionSpace>::value,
Serial, DummyExecutionSpace>,
View<T, P...>> {
ZeroMemset(const Serial&, const View<T, P...>& dst,
typename View<T, P...>::const_value_type&) {
ZeroMemset(const Serial&, const View<T, P...>& dst) {
using ValueType = typename View<T, P...>::value_type;
std::memset(dst.data(), 0, sizeof(ValueType) * dst.size());
}
Expand Down
3 changes: 1 addition & 2 deletions core/src/impl/Kokkos_HostSpace_ZeroMemset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ namespace Impl {

template <class T, class... P>
struct ZeroMemset<HostSpace::execution_space, View<T, P...>> {
ZeroMemset(const HostSpace::execution_space& exec, const View<T, P...>& dst,
typename View<T, P...>::const_value_type&) {
ZeroMemset(const HostSpace::execution_space& exec, const View<T, P...>& dst) {
// Host spaces, except for HPX, are synchronous and we need to fence for HPX
// since we can't properly enqueue a std::memset otherwise.
// We can't use exec.fence() directly since we don't have a full definition
Expand Down
12 changes: 4 additions & 8 deletions core/src/impl/Kokkos_ViewMapping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2918,10 +2918,8 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> {
Kokkos::Profiling::Experimental::device_id(space), &kpID);
}
(void)ZeroMemset(
space,
Kokkos::View<ValueType*, typename DeviceType::memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>(ptr, n),
value);
space, Kokkos::View<ValueType*, typename DeviceType::memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>(ptr, n));

if (Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
Expand Down Expand Up @@ -3050,10 +3048,8 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> {
}

(void)ZeroMemset(
space,
Kokkos::View<ValueType*, typename DeviceType::memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>(ptr, n),
value);
space, Kokkos::View<ValueType*, typename DeviceType::memory_space,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>(ptr, n));

if (Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
Expand Down

0 comments on commit 4c94f08

Please sign in to comment.