Skip to content

Commit

Permalink
Passing size==0 to DeepCopy/memcpy/omp_target_memcpy (kokkos#6273)
Browse files Browse the repository at this point in the history
* In deep_copy(...), don't call Impl::DeepCopy when nbytes == 0

* Don't pass 0-length ranges to std::memcpy

* Added protections around omp_target_memcpy so as not to pass
0-length ranges, as per this comment in
core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.hpp

      // In the Release and RelWithDebInfo builds, the size of the memcpy should
      // be greater than zero to avoid error.

* Removed unnecessary size check in Kokkos_HIP_Instance.cpp
Removed unnecessary size check in Kokkos_Profiling.cpp
Fixed size check in Kokkos_HostSpace_deepcopy.cpp
Fixed size check in Kokkos_OpenMPTarget_Parallel_Common.hpp
  • Loading branch information
nliber committed Aug 4, 2023
1 parent fcd6132 commit f557d3d
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 44 deletions.
4 changes: 2 additions & 2 deletions core/src/Kokkos_CopyViews.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1759,7 +1759,7 @@ inline void deep_copy(
Kokkos::fence(
"Kokkos::deep_copy: copy between contiguous views, pre view equality "
"check");
if ((void*)dst.data() != (void*)src.data()) {
if ((void*)dst.data() != (void*)src.data() && 0 < nbytes) {
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), nbytes);
Kokkos::fence(
Expand Down Expand Up @@ -2906,7 +2906,7 @@ inline void deep_copy(
((dst_type::rank < 7) || (dst.stride_6() == src.stride_6())) &&
((dst_type::rank < 8) || (dst.stride_7() == src.stride_7()))) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
if ((void*)dst.data() != (void*)src.data()) {
if ((void*)dst.data() != (void*)src.data() && 0 < nbytes) {
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space, ExecSpace>(
exec_space, dst.data(), src.data(), nbytes);
}
Expand Down
7 changes: 4 additions & 3 deletions core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,10 @@ int* OpenMPTargetExec::get_lock_array(int num_teams) {

for (int i = 0; i < lock_array_elem; ++i) h_lock_array[i] = 0;

KOKKOS_IMPL_OMPT_SAFE_CALL(
omp_target_memcpy(m_lock_array, h_lock_array, m_lock_size, 0, 0,
omp_get_default_device(), omp_get_initial_device()));
if (0 < m_lock_size)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
m_lock_array, h_lock_array, m_lock_size, 0, 0,
omp_get_default_device(), omp_get_initial_device()));

omp_target_free(h_lock_array, omp_get_initial_device());
}
Expand Down
7 changes: 4 additions & 3 deletions core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,10 @@ UniqueToken<Kokkos::Experimental::OpenMPTarget,
Kokkos::kokkos_malloc<Kokkos::Experimental::OpenMPTargetSpace>(
"Kokkos::OpenMPTarget::m_uniquetoken_ptr", size));
std::vector<uint32_t> h_buf(count, 0);
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(ptr, h_buf.data(), size, 0, 0,
omp_get_default_device(),
omp_get_initial_device()));
if (0 < size)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(ptr, h_buf.data(), size, 0,
0, omp_get_default_device(),
omp_get_initial_device()));

Kokkos::Impl::OpenMPTargetExec::m_uniquetoken_ptr = ptr;
}
Expand Down
81 changes: 46 additions & 35 deletions core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ struct ParallelReduceCopy {
static void memcpy_result(PointerType dest, PointerType src, size_t size,
bool ptr_on_device) {
if (ptr_on_device) {
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(dest, src, size, 0, 0,
omp_get_default_device(),
omp_get_initial_device()));
if (0 < size) {
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(dest, src, size, 0, 0,
omp_get_default_device(),
omp_get_initial_device()));
}

} else {
*dest = *src;
}
Expand Down Expand Up @@ -231,14 +234,16 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>,
final_reducer.init(scratch_ptr);
final_reducer.final(scratch_ptr);
}
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
if (0 < value_count) {
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
}

return;
}
Expand Down Expand Up @@ -310,14 +315,16 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>,
} while (tree_neighbor_offset < max_teams);

// If the result view is on the host, copy back the values via memcpy.
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
if (0 < value_count) {
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
}
}
};

Expand Down Expand Up @@ -596,14 +603,16 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
final_reducer.final(scratch_ptr);
}

if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
if (0 < value_count) {
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
}

return;
}
Expand Down Expand Up @@ -658,14 +667,16 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>,
} while (tree_neighbor_offset < nteams);

// If the result view is on the host, copy back the values via memcpy.
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
if (0 < value_count) {
if (!ptr_on_device)
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_initial_device(), omp_get_default_device()));
else
KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy(
ptr, scratch_ptr, value_count * sizeof(ValueType), 0, 0,
omp_get_default_device(), omp_get_default_device()));
}
}
};

Expand Down
2 changes: 1 addition & 1 deletion core/src/impl/Kokkos_HostSpace_deepcopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void hostspace_parallel_deepcopy_async(const DefaultHostExecutionSpace& exec,
constexpr int host_deep_copy_serial_limit = 10 * 8192;
if ((n < host_deep_copy_serial_limit) ||
(DefaultHostExecutionSpace().concurrency() == 1)) {
std::memcpy(dst, src, n);
if (0 < n) std::memcpy(dst, src, n);
return;
}

Expand Down

0 comments on commit f557d3d

Please sign in to comment.