Skip to content

Commit

Permalink
kokkos#5635: Add parallel_scan overload with return value for ThreadV…
Browse files Browse the repository at this point in the history
…ectorRange
  • Loading branch information
thearusable committed Jul 28, 2023
1 parent 2f12ebb commit 7290d52
Showing 1 changed file with 27 additions and 6 deletions.
33 changes: 27 additions & 6 deletions core/src/impl/Kokkos_HostThreadTeam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -893,16 +893,21 @@ KOKKOS_INLINE_FUNCTION
}
}

template <typename iType, class ClosureType, class Member>
template <typename iType, class ClosureType, class Member, typename ValueType>
KOKKOS_INLINE_FUNCTION
std::enable_if_t<Impl::is_host_thread_team_member<Member>::value>
std::enable_if_t<!Kokkos::is_reducer<ValueType>::value &&
Impl::is_host_thread_team_member<Member>::value>
parallel_scan(Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const&
loop_boundaries,
ClosureType const& closure) {
using value_type = typename Kokkos::Impl::FunctorAnalysis<
Impl::FunctorPatternInterface::SCAN, void, ClosureType, void>::value_type;
ClosureType const& closure, ValueType& return_val) {
// Extract ValueType from the Closure
using ClosureValueType = typename Kokkos::Impl::FunctorAnalysis<
Kokkos::Impl::FunctorPatternInterface::SCAN, void, ClosureType,
void>::value_type;
static_assert(std::is_same<ClosureValueType, ValueType>::value,
"Non-matching value types of closure and return type");

value_type scan_val = value_type();
ValueType scan_val = ValueType();

#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
Expand All @@ -911,6 +916,22 @@ KOKKOS_INLINE_FUNCTION
i += loop_boundaries.increment) {
closure(i, scan_val, true);
}

return_val = scan_val;
}

template <typename iType, class ClosureType, class Member>
KOKKOS_INLINE_FUNCTION
std::enable_if_t<Impl::is_host_thread_team_member<Member>::value>
parallel_scan(Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const&
loop_boundaries,
ClosureType const& closure) {
// Extract ValueType from the closure
using ValueType = typename Kokkos::Impl::FunctorAnalysis<
Impl::FunctorPatternInterface::SCAN, void, ClosureType, void>::value_type;

ValueType scan_val;
parallel_scan(loop_boundaries, closure, scan_val);
}

template <typename iType, class Lambda, typename ReducerType, typename Member>
Expand Down

0 comments on commit 7290d52

Please sign in to comment.