Skip to content

Commit

Permalink
Add parallel_scan overloads with value for Threads
Browse files Browse the repository at this point in the history
  • Loading branch information
thearusable authored and cz4rs committed Oct 4, 2023
1 parent c63f125 commit 495b1cc
Showing 1 changed file with 23 additions and 4 deletions.
27 changes: 23 additions & 4 deletions core/src/Threads/Kokkos_ThreadsTeam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1038,17 +1038,20 @@ KOKKOS_INLINE_FUNCTION void parallel_scan(
* final==true. Scan_val will be set to the final sum value over all vector
* lanes.
*/
template <typename iType, class FunctorType>
template <typename iType, class FunctorType, typename ValueType>
KOKKOS_INLINE_FUNCTION void parallel_scan(
const Impl::ThreadVectorRangeBoundariesStruct<
iType, Impl::ThreadsExecTeamMember>& loop_boundaries,
const FunctorType& lambda) {
using value_type =
const FunctorType& lambda, ValueType& return_val) {
// Extract ValueType from the Closure
using closure_value_type =
typename Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN,
TeamPolicy<Threads>, FunctorType,
void>::value_type;
static_assert(std::is_same<closure_value_type, ValueType>::value,
"Non-matching value types of closure and return type");

value_type scan_val = value_type();
ValueType scan_val = ValueType();

#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
Expand All @@ -1057,6 +1060,22 @@ KOKKOS_INLINE_FUNCTION void parallel_scan(
i += loop_boundaries.increment) {
lambda(i, scan_val, true);
}

return_val = scan_val;
}

template <typename iType, class FunctorType>
KOKKOS_INLINE_FUNCTION void parallel_scan(
const Impl::ThreadVectorRangeBoundariesStruct<
iType, Impl::ThreadsExecTeamMember>& loop_boundaries,
const FunctorType& lambda) {
using value_type =
typename Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN,
TeamPolicy<Threads>, FunctorType,
void>::value_type;

value_type scan_val;
parallel_scan(loop_boundaries, lambda, scan_val);
}

/** \brief Intra-thread vector parallel scan with reducer
Expand Down

0 comments on commit 495b1cc

Please sign in to comment.