forked from kokkos/kokkos
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
741 additions
and
633 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
core/src/Threads/Kokkos_Threads_ParallelFor_MDRange.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#ifndef KOKKOS_THREADS_PARALLEL_FOR_MDRANGE_HPP | ||
#define KOKKOS_THREADS_PARALLEL_FOR_MDRANGE_HPP | ||
|
||
#include <Kokkos_Parallel.hpp> | ||
|
||
#include <KokkosExp_MDRangePolicy.hpp> | ||
namespace Kokkos { | ||
namespace Impl { | ||
|
||
template <class FunctorType, class... Traits> | ||
class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, | ||
Kokkos::Threads> { | ||
private: | ||
using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; | ||
using Policy = typename MDRangePolicy::impl_range_policy; | ||
|
||
using WorkTag = typename MDRangePolicy::work_tag; | ||
|
||
using WorkRange = typename Policy::WorkRange; | ||
using Member = typename Policy::member_type; | ||
|
||
using iterate_type = typename Kokkos::Impl::HostIterateTile< | ||
MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>; | ||
|
||
const iterate_type m_iter; | ||
|
||
inline void exec_range(const Member ibeg, const Member iend) const { | ||
for (Member i = ibeg; i < iend; ++i) { | ||
m_iter(i); | ||
} | ||
} | ||
|
||
static void exec(ThreadsExec &exec, const void *arg) { | ||
exec_schedule<typename Policy::schedule_type::type>(exec, arg); | ||
} | ||
|
||
template <class Schedule> | ||
static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value> | ||
exec_schedule(ThreadsExec &exec, const void *arg) { | ||
const ParallelFor &self = *((const ParallelFor *)arg); | ||
|
||
auto const num_tiles = self.m_iter.m_rp.m_num_tiles; | ||
WorkRange range(Policy(0, num_tiles).set_chunk_size(1), exec.pool_rank(), | ||
exec.pool_size()); | ||
|
||
self.exec_range(range.begin(), range.end()); | ||
|
||
exec.fan_in(); | ||
} | ||
|
||
template <class Schedule> | ||
static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value> | ||
exec_schedule(ThreadsExec &exec, const void *arg) { | ||
const ParallelFor &self = *((const ParallelFor *)arg); | ||
|
||
auto const num_tiles = self.m_iter.m_rp.m_num_tiles; | ||
WorkRange range(Policy(0, num_tiles).set_chunk_size(1), exec.pool_rank(), | ||
exec.pool_size()); | ||
|
||
exec.set_work_range(range.begin(), range.end(), 1); | ||
exec.reset_steal_target(); | ||
exec.barrier(); | ||
|
||
long work_index = exec.get_work_index(); | ||
|
||
while (work_index != -1) { | ||
const Member begin = static_cast<Member>(work_index); | ||
const Member end = begin + 1 < num_tiles ? begin + 1 : num_tiles; | ||
|
||
self.exec_range(begin, end); | ||
work_index = exec.get_work_index(); | ||
} | ||
|
||
exec.fan_in(); | ||
} | ||
|
||
public: | ||
inline void execute() const { | ||
ThreadsExec::start(&ParallelFor::exec, this); | ||
ThreadsExec::fence(); | ||
} | ||
|
||
ParallelFor(const FunctorType &arg_functor, const MDRangePolicy &arg_policy) | ||
: m_iter(arg_policy, arg_functor) {} | ||
|
||
template <typename Policy, typename Functor> | ||
static int max_tile_size_product(const Policy &, const Functor &) { | ||
/** | ||
* 1024 here is just our guess for a reasonable max tile size, | ||
* it isn't a hardware constraint. If people see a use for larger | ||
* tile size products, we're happy to change this. | ||
*/ | ||
return 1024; | ||
} | ||
}; | ||
|
||
} // namespace Impl | ||
} // namespace Kokkos | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#ifndef KOKKOS_THREADS_PARALLEL_FOR_RANGE_HPP | ||
#define KOKKOS_THREADS_PARALLEL_FOR_RANGE_HPP | ||
|
||
#include <Kokkos_Parallel.hpp> | ||
|
||
namespace Kokkos { | ||
namespace Impl { | ||
|
||
template <class FunctorType, class... Traits> | ||
class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, | ||
Kokkos::Threads> { | ||
private: | ||
using Policy = Kokkos::RangePolicy<Traits...>; | ||
using WorkTag = typename Policy::work_tag; | ||
using WorkRange = typename Policy::WorkRange; | ||
using Member = typename Policy::member_type; | ||
|
||
const FunctorType m_functor; | ||
const Policy m_policy; | ||
|
||
template <class TagType> | ||
inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( | ||
const FunctorType &functor, const Member ibeg, const Member iend) { | ||
#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ | ||
defined(KOKKOS_ENABLE_PRAGMA_IVDEP) | ||
#pragma ivdep | ||
#endif | ||
for (Member i = ibeg; i < iend; ++i) { | ||
functor(i); | ||
} | ||
} | ||
|
||
template <class TagType> | ||
inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( | ||
const FunctorType &functor, const Member ibeg, const Member iend) { | ||
const TagType t{}; | ||
#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ | ||
defined(KOKKOS_ENABLE_PRAGMA_IVDEP) | ||
#pragma ivdep | ||
#endif | ||
for (Member i = ibeg; i < iend; ++i) { | ||
functor(t, i); | ||
} | ||
} | ||
|
||
static void exec(ThreadsExec &exec, const void *arg) { | ||
exec_schedule<typename Policy::schedule_type::type>(exec, arg); | ||
} | ||
|
||
template <class Schedule> | ||
static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value> | ||
exec_schedule(ThreadsExec &exec, const void *arg) { | ||
const ParallelFor &self = *((const ParallelFor *)arg); | ||
|
||
WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); | ||
|
||
ParallelFor::template exec_range<WorkTag>(self.m_functor, range.begin(), | ||
range.end()); | ||
|
||
exec.fan_in(); | ||
} | ||
|
||
template <class Schedule> | ||
static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value> | ||
exec_schedule(ThreadsExec &exec, const void *arg) { | ||
const ParallelFor &self = *((const ParallelFor *)arg); | ||
|
||
WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); | ||
|
||
exec.set_work_range(range.begin() - self.m_policy.begin(), | ||
range.end() - self.m_policy.begin(), | ||
self.m_policy.chunk_size()); | ||
exec.reset_steal_target(); | ||
exec.barrier(); | ||
|
||
long work_index = exec.get_work_index(); | ||
|
||
while (work_index != -1) { | ||
const Member begin = | ||
static_cast<Member>(work_index) * self.m_policy.chunk_size() + | ||
self.m_policy.begin(); | ||
const Member end = | ||
begin + self.m_policy.chunk_size() < self.m_policy.end() | ||
? begin + self.m_policy.chunk_size() | ||
: self.m_policy.end(); | ||
ParallelFor::template exec_range<WorkTag>(self.m_functor, begin, end); | ||
work_index = exec.get_work_index(); | ||
} | ||
|
||
exec.fan_in(); | ||
} | ||
|
||
public: | ||
inline void execute() const { | ||
ThreadsExec::start(&ParallelFor::exec, this); | ||
ThreadsExec::fence(); | ||
} | ||
|
||
ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) | ||
: m_functor(arg_functor), m_policy(arg_policy) {} | ||
}; | ||
|
||
} // namespace Impl | ||
} // namespace Kokkos | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#ifndef KOKKOS_THREADS_PARALLEL_FOR_TEAM_HPP | ||
#define KOKKOS_THREADS_PARALLEL_FOR_TEAM_HPP | ||
|
||
#include <Kokkos_Parallel.hpp> | ||
|
||
namespace Kokkos { | ||
namespace Impl { | ||
|
||
template <class FunctorType, class... Properties> | ||
class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, | ||
Kokkos::Threads> { | ||
private: | ||
using Policy = | ||
Kokkos::Impl::TeamPolicyInternal<Kokkos::Threads, Properties...>; | ||
using WorkTag = typename Policy::work_tag; | ||
using Member = typename Policy::member_type; | ||
|
||
const FunctorType m_functor; | ||
const Policy m_policy; | ||
const size_t m_shared; | ||
|
||
template <class TagType, class Schedule> | ||
inline static std::enable_if_t<std::is_void<TagType>::value && | ||
std::is_same<Schedule, Kokkos::Static>::value> | ||
exec_team(const FunctorType &functor, Member member) { | ||
for (; member.valid_static(); member.next_static()) { | ||
functor(member); | ||
} | ||
} | ||
|
||
template <class TagType, class Schedule> | ||
inline static std::enable_if_t<!std::is_void<TagType>::value && | ||
std::is_same<Schedule, Kokkos::Static>::value> | ||
exec_team(const FunctorType &functor, Member member) { | ||
const TagType t{}; | ||
for (; member.valid_static(); member.next_static()) { | ||
functor(t, member); | ||
} | ||
} | ||
|
||
template <class TagType, class Schedule> | ||
inline static std::enable_if_t<std::is_void<TagType>::value && | ||
std::is_same<Schedule, Kokkos::Dynamic>::value> | ||
exec_team(const FunctorType &functor, Member member) { | ||
for (; member.valid_dynamic(); member.next_dynamic()) { | ||
functor(member); | ||
} | ||
} | ||
|
||
template <class TagType, class Schedule> | ||
inline static std::enable_if_t<!std::is_void<TagType>::value && | ||
std::is_same<Schedule, Kokkos::Dynamic>::value> | ||
exec_team(const FunctorType &functor, Member member) { | ||
const TagType t{}; | ||
for (; member.valid_dynamic(); member.next_dynamic()) { | ||
functor(t, member); | ||
} | ||
} | ||
|
||
static void exec(ThreadsExec &exec, const void *arg) { | ||
const ParallelFor &self = *((const ParallelFor *)arg); | ||
|
||
ParallelFor::exec_team<WorkTag, typename Policy::schedule_type::type>( | ||
self.m_functor, Member(&exec, self.m_policy, self.m_shared)); | ||
|
||
exec.barrier(); | ||
exec.fan_in(); | ||
} | ||
template <typename Policy> | ||
Policy fix_policy(Policy policy) { | ||
if (policy.impl_vector_length() < 0) { | ||
policy.impl_set_vector_length(1); | ||
} | ||
if (policy.team_size() < 0) { | ||
policy.impl_set_team_size( | ||
policy.team_size_recommended(m_functor, ParallelForTag{})); | ||
} | ||
return policy; | ||
} | ||
|
||
public: | ||
inline void execute() const { | ||
ThreadsExec::resize_scratch( | ||
0, Policy::member_type::team_reduce_size() + m_shared); | ||
|
||
ThreadsExec::start(&ParallelFor::exec, this); | ||
|
||
ThreadsExec::fence(); | ||
} | ||
|
||
ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) | ||
: m_functor(arg_functor), | ||
m_policy(fix_policy(arg_policy)), | ||
m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + | ||
FunctorTeamShmemSize<FunctorType>::value( | ||
arg_functor, m_policy.team_size())) {} | ||
}; | ||
|
||
} // namespace Impl | ||
} // namespace Kokkos | ||
|
||
#endif |
Oops, something went wrong.