Skip to content

Commit

Permalink
Use team_size_max to fix "Team size too large" error in reducer test (#…
Browse files Browse the repository at this point in the history
…6725)

* Fix test to not use team size too large

For some configurations, one of the reducer test was setting a team size too large.

* Remove team_size calculation from test not using it

* Remove unused typedef

* Use int in for-loop to avoid warning

Co-authored-by: Damien L-G <dalg24+github@gmail.com>

* Fix teamsize request for OPENMPTARGET

* Cleanup test and match TestTeam.hpp

Use 32 as a minimum team size like in TestTeam.hpp. Better match layout
in the 3 tests here.

---------

Co-authored-by: Damien L-G <dalg24+github@gmail.com>
  • Loading branch information
tcclevenger and dalg24 committed Jan 22, 2024
1 parent 6912b39 commit 407e18d
Showing 1 changed file with 83 additions and 36 deletions.
119 changes: 83 additions & 36 deletions core/unit_test/TestReducers.hpp
Expand Up @@ -56,6 +56,28 @@ struct TestReducers {
}
};

struct TeamSumNestedFunctor {
using member_type = typename Kokkos::TeamPolicy<ExecSpace>::member_type;

SumFunctor f;
int M, N;
Kokkos::View<Scalar*, ExecSpace> result;

TeamSumNestedFunctor(SumFunctor& f_, const int M_, const int N_,
Kokkos::View<Scalar*, ExecSpace> result_)
: f(f_), M(M_), N(N_), result(result_) {}

KOKKOS_INLINE_FUNCTION
void operator()(const member_type& m) const {
const int i = m.league_rank();
Scalar local_scalar;
Kokkos::Sum<Scalar, typename ExecSpace::memory_space> reducer_scalar(
local_scalar);
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(m, N), f, reducer_scalar);
result(i) = local_scalar;
}
};

struct ProdFunctor {
Kokkos::View<const Scalar*, ExecSpace> values;

Expand Down Expand Up @@ -351,53 +373,78 @@ struct TestReducers {
}
#endif

using member_type = typename Kokkos::TeamPolicy<ExecSpace>::member_type;

Scalar sum_scalar;
Kokkos::View<Scalar, ExecSpace> sum_view("result");
Kokkos::deep_copy(sum_view, Scalar(1));

constexpr int num_teams = get_num_teams();
TeamSumFunctor tf;
// Test team policy reduction
{
constexpr int num_teams = get_num_teams();
TeamSumFunctor tf;
// FIXME_OPENMPTARGET temporary restriction for team size to be at least
// 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
auto team_pol = Kokkos::TeamPolicy<ExecSpace>(num_teams, Kokkos::AUTO);
int team_size =
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: 1;
#else
auto team_pol = Kokkos::TeamPolicy<ExecSpace>(num_teams, 1);
int team_size = 1;
#endif
Kokkos::parallel_reduce(team_pol, tf, sum_view);
Kokkos::deep_copy(sum_scalar, sum_view);
ASSERT_EQ(sum_scalar, Scalar{num_teams}) << "num_teams: " << num_teams;
auto team_pol = Kokkos::TeamPolicy<ExecSpace>(num_teams, team_size);
Kokkos::parallel_reduce(team_pol, tf, sum_view);
Kokkos::deep_copy(sum_scalar, sum_view);
ASSERT_EQ(sum_scalar, Scalar{num_teams}) << "num_teams: " << num_teams;
}

Kokkos::parallel_for(
// Test TeamThreadRange level reduction with 0 work produces 0 result
{
const int league_size = 1;
Kokkos::View<Scalar*, ExecSpace> result("result", league_size);
TeamSumNestedFunctor tnf(f, league_size, 0, result);
// FIXME_OPENMPTARGET temporary restriction for team size to be at least
// 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
Kokkos::TeamPolicy<ExecSpace>(1, Kokkos::AUTO),
int team_size =
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: 1;
#else
Kokkos::TeamPolicy<ExecSpace>(1, 1),
int team_size = 1;
#endif
KOKKOS_LAMBDA(member_type team_member) {
Scalar local_scalar;
Kokkos::Sum<Scalar, typename ExecSpace::memory_space> reducer_scalar(
local_scalar);
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team_member, 0), f,
reducer_scalar);
sum_view() = local_scalar;
});
Kokkos::deep_copy(sum_scalar, sum_view);
ASSERT_EQ(sum_scalar, Scalar{0}) << "N: " << N;

auto team_size = std::min(128, TEST_EXECSPACE().concurrency());
Kokkos::parallel_for(
Kokkos::TeamPolicy<ExecSpace>(10, team_size),
KOKKOS_LAMBDA(member_type team_member) {
Scalar local_scalar;
Kokkos::Sum<Scalar, typename ExecSpace::memory_space> reducer_scalar(
local_scalar);
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team_member, N), f,
reducer_scalar);
sum_view() = local_scalar;
});
Kokkos::deep_copy(sum_scalar, sum_view);
ASSERT_EQ(sum_scalar, reference_sum) << "N: " << N;
auto team_pol = Kokkos::TeamPolicy<ExecSpace>(1, team_size);
Kokkos::parallel_for(team_pol, tnf);
auto result_h =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), result);
ASSERT_EQ(result_h(0), Scalar{0}) << "N: " << N;
}

// Same test as above, but with inner reduction over N, and league_size=10
{
const int league_size = 10;
Kokkos::View<Scalar*, ExecSpace> result("result", league_size);
TeamSumNestedFunctor tnf(f, league_size, N, result);
// FIXME_OPENMPTARGET temporary restriction for team size to be at least
// 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
int initial_team_size =
std::is_same_v<ExecSpace, Kokkos::Experimental::OpenMPTarget> ? 32
: 1;
#else
int initial_team_size = 1;
#endif
auto team_size_max =
Kokkos::TeamPolicy<ExecSpace>(league_size, initial_team_size)
.team_size_max(tnf, Kokkos::ParallelForTag());
auto team_size = std::min(team_size_max, TEST_EXECSPACE().concurrency());
auto team_pol = Kokkos::TeamPolicy<ExecSpace>(league_size, team_size);
Kokkos::parallel_for(team_pol, tnf);
auto result_h =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), result);
for (int i = 0; i < result_h.extent_int(0); ++i) {
ASSERT_EQ(result_h(i), reference_sum) << "N: " << N;
}
}
}

static void test_sum(int N) {
Expand Down

0 comments on commit 407e18d

Please sign in to comment.