Skip to content

Commit

Permalink
Split Kokkos_Threads_Parallel files
Browse files Browse the repository at this point in the history
  • Loading branch information
Rombur committed Oct 9, 2023
1 parent a601d81 commit cb22b80
Show file tree
Hide file tree
Showing 12 changed files with 741 additions and 633 deletions.
16 changes: 0 additions & 16 deletions core/src/Threads/Kokkos_Threads.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,21 +169,5 @@ struct MemorySpaceAccess<Kokkos::Threads::memory_space,
} // namespace Impl
} // namespace Kokkos

/*--------------------------------------------------------------------------*/

#include <Kokkos_ExecPolicy.hpp>
#include <Kokkos_Parallel.hpp>
#include <Threads/Kokkos_ThreadsExec.hpp>
#include <Threads/Kokkos_ThreadsTeam.hpp>
#include <Threads/Kokkos_Threads_Parallel_Range.hpp>
#include <Threads/Kokkos_Threads_Parallel_MDRange.hpp>
#include <Threads/Kokkos_Threads_Parallel_Team.hpp>
#include <Threads/Kokkos_Threads_UniqueToken.hpp>

#include <KokkosExp_MDRangePolicy.hpp>

//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

#endif /* #if defined( KOKKOS_ENABLE_THREADS ) */
#endif /* #define KOKKOS_THREADS_HPP */
2 changes: 2 additions & 0 deletions core/src/Threads/Kokkos_ThreadsExec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
#include <impl/Kokkos_Spinwait.hpp>

#include <Kokkos_Atomic.hpp>
#include <Kokkos_Pair.hpp>

#include <impl/Kokkos_ConcurrentBitset.hpp>
#include <Threads/Kokkos_Threads.hpp>

//----------------------------------------------------------------------------

Expand Down
115 changes: 115 additions & 0 deletions core/src/Threads/Kokkos_Threads_ParallelFor_MDRange.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_THREADS_PARALLEL_FOR_MDRANGE_HPP
#define KOKKOS_THREADS_PARALLEL_FOR_MDRANGE_HPP

#include <Kokkos_Parallel.hpp>

#include <KokkosExp_MDRangePolicy.hpp>
namespace Kokkos {
namespace Impl {

template <class FunctorType, class... Traits>
class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
Kokkos::Threads> {
private:
using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>;
using Policy = typename MDRangePolicy::impl_range_policy;

using WorkTag = typename MDRangePolicy::work_tag;

using WorkRange = typename Policy::WorkRange;
using Member = typename Policy::member_type;

using iterate_type = typename Kokkos::Impl::HostIterateTile<
MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>;

const iterate_type m_iter;

inline void exec_range(const Member ibeg, const Member iend) const {
for (Member i = ibeg; i < iend; ++i) {
m_iter(i);
}
}

static void exec(ThreadsExec &exec, const void *arg) {
exec_schedule<typename Policy::schedule_type::type>(exec, arg);
}

template <class Schedule>
static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value>
exec_schedule(ThreadsExec &exec, const void *arg) {
const ParallelFor &self = *((const ParallelFor *)arg);

auto const num_tiles = self.m_iter.m_rp.m_num_tiles;
WorkRange range(Policy(0, num_tiles).set_chunk_size(1), exec.pool_rank(),
exec.pool_size());

self.exec_range(range.begin(), range.end());

exec.fan_in();
}

template <class Schedule>
static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value>
exec_schedule(ThreadsExec &exec, const void *arg) {
const ParallelFor &self = *((const ParallelFor *)arg);

auto const num_tiles = self.m_iter.m_rp.m_num_tiles;
WorkRange range(Policy(0, num_tiles).set_chunk_size(1), exec.pool_rank(),
exec.pool_size());

exec.set_work_range(range.begin(), range.end(), 1);
exec.reset_steal_target();
exec.barrier();

long work_index = exec.get_work_index();

while (work_index != -1) {
const Member begin = static_cast<Member>(work_index);
const Member end = begin + 1 < num_tiles ? begin + 1 : num_tiles;

self.exec_range(begin, end);
work_index = exec.get_work_index();
}

exec.fan_in();
}

public:
inline void execute() const {
ThreadsExec::start(&ParallelFor::exec, this);
ThreadsExec::fence();
}

ParallelFor(const FunctorType &arg_functor, const MDRangePolicy &arg_policy)
: m_iter(arg_policy, arg_functor) {}

template <typename Policy, typename Functor>
static int max_tile_size_product(const Policy &, const Functor &) {
/**
* 1024 here is just our guess for a reasonable max tile size,
* it isn't a hardware constraint. If people see a use for larger
* tile size products, we're happy to change this.
*/
return 1024;
}
};

} // namespace Impl
} // namespace Kokkos
#endif
122 changes: 122 additions & 0 deletions core/src/Threads/Kokkos_Threads_ParallelFor_Range.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_THREADS_PARALLEL_FOR_RANGE_HPP
#define KOKKOS_THREADS_PARALLEL_FOR_RANGE_HPP

#include <Kokkos_Parallel.hpp>

namespace Kokkos {
namespace Impl {

template <class FunctorType, class... Traits>
class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>,
Kokkos::Threads> {
private:
using Policy = Kokkos::RangePolicy<Traits...>;
using WorkTag = typename Policy::work_tag;
using WorkRange = typename Policy::WorkRange;
using Member = typename Policy::member_type;

const FunctorType m_functor;
const Policy m_policy;

template <class TagType>
inline static std::enable_if_t<std::is_void<TagType>::value> exec_range(
const FunctorType &functor, const Member ibeg, const Member iend) {
#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \
defined(KOKKOS_ENABLE_PRAGMA_IVDEP)
#pragma ivdep
#endif
for (Member i = ibeg; i < iend; ++i) {
functor(i);
}
}

template <class TagType>
inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range(
const FunctorType &functor, const Member ibeg, const Member iend) {
const TagType t{};
#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \
defined(KOKKOS_ENABLE_PRAGMA_IVDEP)
#pragma ivdep
#endif
for (Member i = ibeg; i < iend; ++i) {
functor(t, i);
}
}

static void exec(ThreadsExec &exec, const void *arg) {
exec_schedule<typename Policy::schedule_type::type>(exec, arg);
}

template <class Schedule>
static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value>
exec_schedule(ThreadsExec &exec, const void *arg) {
const ParallelFor &self = *((const ParallelFor *)arg);

WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size());

ParallelFor::template exec_range<WorkTag>(self.m_functor, range.begin(),
range.end());

exec.fan_in();
}

template <class Schedule>
static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value>
exec_schedule(ThreadsExec &exec, const void *arg) {
const ParallelFor &self = *((const ParallelFor *)arg);

WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size());

exec.set_work_range(range.begin() - self.m_policy.begin(),
range.end() - self.m_policy.begin(),
self.m_policy.chunk_size());
exec.reset_steal_target();
exec.barrier();

long work_index = exec.get_work_index();

while (work_index != -1) {
const Member begin =
static_cast<Member>(work_index) * self.m_policy.chunk_size() +
self.m_policy.begin();
const Member end =
begin + self.m_policy.chunk_size() < self.m_policy.end()
? begin + self.m_policy.chunk_size()
: self.m_policy.end();
ParallelFor::template exec_range<WorkTag>(self.m_functor, begin, end);
work_index = exec.get_work_index();
}

exec.fan_in();
}

public:
inline void execute() const {
ThreadsExec::start(&ParallelFor::exec, this);
ThreadsExec::fence();
}

ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy)
: m_functor(arg_functor), m_policy(arg_policy) {}
};

} // namespace Impl
} // namespace Kokkos

#endif
118 changes: 118 additions & 0 deletions core/src/Threads/Kokkos_Threads_ParallelFor_Team.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_THREADS_PARALLEL_FOR_TEAM_HPP
#define KOKKOS_THREADS_PARALLEL_FOR_TEAM_HPP

#include <Kokkos_Parallel.hpp>

namespace Kokkos {
namespace Impl {

template <class FunctorType, class... Properties>
class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
Kokkos::Threads> {
private:
using Policy =
Kokkos::Impl::TeamPolicyInternal<Kokkos::Threads, Properties...>;
using WorkTag = typename Policy::work_tag;
using Member = typename Policy::member_type;

const FunctorType m_functor;
const Policy m_policy;
const size_t m_shared;

template <class TagType, class Schedule>
inline static std::enable_if_t<std::is_void<TagType>::value &&
std::is_same<Schedule, Kokkos::Static>::value>
exec_team(const FunctorType &functor, Member member) {
for (; member.valid_static(); member.next_static()) {
functor(member);
}
}

template <class TagType, class Schedule>
inline static std::enable_if_t<!std::is_void<TagType>::value &&
std::is_same<Schedule, Kokkos::Static>::value>
exec_team(const FunctorType &functor, Member member) {
const TagType t{};
for (; member.valid_static(); member.next_static()) {
functor(t, member);
}
}

template <class TagType, class Schedule>
inline static std::enable_if_t<std::is_void<TagType>::value &&
std::is_same<Schedule, Kokkos::Dynamic>::value>
exec_team(const FunctorType &functor, Member member) {
for (; member.valid_dynamic(); member.next_dynamic()) {
functor(member);
}
}

template <class TagType, class Schedule>
inline static std::enable_if_t<!std::is_void<TagType>::value &&
std::is_same<Schedule, Kokkos::Dynamic>::value>
exec_team(const FunctorType &functor, Member member) {
const TagType t{};
for (; member.valid_dynamic(); member.next_dynamic()) {
functor(t, member);
}
}

static void exec(ThreadsExec &exec, const void *arg) {
const ParallelFor &self = *((const ParallelFor *)arg);

ParallelFor::exec_team<WorkTag, typename Policy::schedule_type::type>(
self.m_functor, Member(&exec, self.m_policy, self.m_shared));

exec.barrier();
exec.fan_in();
}
template <typename Policy>
Policy fix_policy(Policy policy) {
if (policy.impl_vector_length() < 0) {
policy.impl_set_vector_length(1);
}
if (policy.team_size() < 0) {
policy.impl_set_team_size(
policy.team_size_recommended(m_functor, ParallelForTag{}));
}
return policy;
}

public:
inline void execute() const {
ThreadsExec::resize_scratch(
0, Policy::member_type::team_reduce_size() + m_shared);

ThreadsExec::start(&ParallelFor::exec, this);

ThreadsExec::fence();
}

ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy)
: m_functor(arg_functor),
m_policy(fix_policy(arg_policy)),
m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) +
FunctorTeamShmemSize<FunctorType>::value(
arg_functor, m_policy.team_size())) {}
};

} // namespace Impl
} // namespace Kokkos

#endif

0 comments on commit cb22b80

Please sign in to comment.