Skip to content

Commit

Permalink
Initialize m_num_scratch_locks for Cuda parallel_for TeamPolicy
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Arndt authored and masterleinad committed Sep 12, 2023
1 parent 9081d36 commit 6979f67
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
7 changes: 4 additions & 3 deletions core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,9 +554,10 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
m_shmem_size =
(m_policy.scratch_size(0, m_team_size) +
FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size));
m_scratch_size[0] = m_policy.scratch_size(0, m_team_size);
m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
m_scratch_locks = internal_space_instance->m_scratch_locks;
m_scratch_size[0] = m_policy.scratch_size(0, m_team_size);
m_scratch_size[1] = m_policy.scratch_size(1, m_team_size);
m_scratch_locks = internal_space_instance->m_scratch_locks;
m_num_scratch_locks = internal_space_instance->m_num_scratch_locks;

// Functor's reduce memory, team scan memory, and team shared memory depend
// upon team size.
Expand Down
18 changes: 18 additions & 0 deletions core/unit_test/TestTeamScratch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,23 @@ TEST(TEST_CATEGORY, multi_level_scratch) {
#endif
}

struct DummyTeamParallelForFunctor {
KOKKOS_FUNCTION void operator()(
Kokkos::TeamPolicy<TEST_EXECSPACE>::member_type) const {}
};

TEST(TEST_CATEGORY, team_scratch_memory_index_parallel_for) {
// Requesting per team scratch memory for a largish number of teams, resulted
// in problems computing the correct scratch pointer due to missed
// initialization of the maximum number of scratch pad indices in the Cuda
// baackend.
const int scratch_size = 4896;
const int league_size = 7535;

Kokkos::TeamPolicy<TEST_EXECSPACE> policy(league_size, Kokkos::AUTO);
policy.set_scratch_size(1, Kokkos::PerTeam(scratch_size));
Kokkos::parallel_for("kernel", policy, DummyTeamParallelForFunctor());
}

} // namespace Test
#endif

0 comments on commit 6979f67

Please sign in to comment.