Skip to content

Commit

Permalink
[HIP] Improve heuristic deciding the number of blocks used in paralle…
Browse files Browse the repository at this point in the history
…l_reduce (kokkos#6160)

* Improve heuristic deciding the number of blocks used in parallel_reduce

* Remove commented code

* Use auto

* Simplify constructor

* Improve comment

Co-authored-by: Christian Trott <crtrott@sandia.gov>

* Fix format

---------

Co-authored-by: Christian Trott <crtrott@sandia.gov>
  • Loading branch information
Rombur and crtrott committed Jun 9, 2023
1 parent 43a797b commit e200ba1
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions core/src/HIP/Kokkos_HIP_Parallel_Range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,24 @@ class ParallelReduce<CombinedFunctorReducerType, Kokkos::RangePolicy<Traits...>,
dim3 block(1, block_size, 1);
// use a slightly less constrained, but still well bounded limit for
// scratch
uint32_t nblocks = static_cast<uint32_t>((nwork + block.y - 1) / block.y);
nblocks = std::min(nblocks, 4096u);
m_scratch_space = ::Kokkos::Impl::hip_internal_scratch_space(
int nblocks = (nwork + block.y - 1) / block.y;
// Heuristic deciding the value of nblocks. The values for the light
// weight case have been chosen using a vector product benchmark on MI250.
constexpr auto light_weight =
Kokkos::Experimental::WorkItemProperty::HintLightWeight;
constexpr typename Policy::work_item_property property;
if ((property & light_weight) == light_weight) {
if (nblocks < block_size) {
// Keep nblocks as is
} else if (nblocks < 16 * block_size) {
nblocks = block_size;
} else {
nblocks = 4 * block_size;
}
} else {
nblocks = std::min(nblocks, 4096);
}
m_scratch_space = ::Kokkos::Impl::hip_internal_scratch_space(
m_policy.space(), reducer.value_size() * nblocks);
m_scratch_flags = ::Kokkos::Impl::hip_internal_scratch_flags(
m_policy.space(), sizeof(size_type));
Expand Down

0 comments on commit e200ba1

Please sign in to comment.