Skip to content

Commit

Permalink
OpenMP: Fix TeamThreadRange parallel_scan with return value for team_…
Browse files Browse the repository at this point in the history
…size > 1
  • Loading branch information
masterleinad committed Sep 27, 2023
1 parent 41cf2e5 commit b610a28
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
6 changes: 5 additions & 1 deletion core/src/impl/Kokkos_HostThreadTeam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -885,14 +885,18 @@ KOKKOS_INLINE_FUNCTION
closure(i, accum, false);
}

auto team_member = loop_boundaries.thread;

// 'accum' output is the exclusive prefix sum
accum = loop_boundaries.thread.team_scan(accum);
accum = team_member.team_scan(accum);

for (iType i = loop_boundaries.start; i < loop_boundaries.end;
i += loop_boundaries.increment) {
closure(i, accum, true);
}

team_member.team_broadcast(accum, team_member.team_size() - 1);

return_val = accum;
}

Expand Down
9 changes: 8 additions & 1 deletion core/unit_test/TestTeamScan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,14 @@ struct TestTeamScanRetVal {
a_r = view_2d_type("a_r", M, N);
a_s = view_1d_type("a_s", M);

Kokkos::parallel_for(policy_type(M, Kokkos::AUTO), *this);
// Set team size explicitly to check whether non-power-of-two team sizes can
// be used.
if (ExecutionSpace().concurrency() > 10000)
Kokkos::parallel_for(policy_type(M, 127), *this);
else if (ExecutionSpace().concurrency() > 2)
Kokkos::parallel_for(policy_type(M, 3), *this);
else
Kokkos::parallel_for(policy_type(M, 1), *this);

Kokkos::fence();
auto a_i = Kokkos::create_mirror_view(a_d);
Expand Down

0 comments on commit b610a28

Please sign in to comment.