Skip to content

Commit

Permalink
Split files in HIP backend
Browse files Browse the repository at this point in the history
  • Loading branch information
Rombur committed Oct 30, 2023
1 parent b4f27c8 commit 6b4ee34
Show file tree
Hide file tree
Showing 10 changed files with 1,606 additions and 1,457 deletions.
173 changes: 173 additions & 0 deletions core/src/HIP/Kokkos_HIP_ParallelFor_MDRange.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_HIP_PARALLEL_FOR_MDRANGE_HPP
#define KOKKOS_HIP_PARALLEL_FOR_MDRANGE_HPP

#include <Kokkos_Parallel.hpp>

#include <HIP/Kokkos_HIP_BlockSize_Deduction.hpp>
#include <HIP/Kokkos_HIP_KernelLaunch.hpp>
#include <KokkosExp_MDRangePolicy.hpp>
#include <impl/KokkosExp_IterateTileGPU.hpp>

namespace Kokkos {
namespace Impl {

// ParallelFor
template <class FunctorType, class... Traits>
class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, HIP> {
public:
using Policy = Kokkos::MDRangePolicy<Traits...>;
using functor_type = FunctorType;

private:
using array_index_type = typename Policy::array_index_type;
using index_type = typename Policy::index_type;
using LaunchBounds = typename Policy::launch_bounds;

const FunctorType m_functor;
const Policy m_policy;

public:
ParallelFor() = delete;
ParallelFor(ParallelFor const&) = default;
ParallelFor& operator=(ParallelFor const&) = delete;

inline __device__ void operator()() const {
Kokkos::Impl::DeviceIterateTile<Policy::rank, Policy, FunctorType,
typename Policy::work_tag>(m_policy,
m_functor)
.exec_range();
}

inline void execute() const {
using ClosureType = ParallelFor<FunctorType, Policy, HIP>;
if (m_policy.m_num_tiles == 0) return;
auto const maxblocks = hip_internal_maximum_grid_count();
if (Policy::rank == 2) {
dim3 const block(m_policy.m_tile[0], m_policy.m_tile[1], 1);
dim3 const grid(
std::min<array_index_type>(
(m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) /
block.x,
maxblocks[0]),
std::min<array_index_type>(
(m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) /
block.y,
maxblocks[1]),
1);
hip_parallel_launch<ClosureType, LaunchBounds>(
*this, grid, block, 0,
m_policy.space().impl_internal_space_instance(), false);
} else if (Policy::rank == 3) {
dim3 const block(m_policy.m_tile[0], m_policy.m_tile[1],
m_policy.m_tile[2]);
dim3 const grid(
std::min<array_index_type>(
(m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) /
block.x,
maxblocks[0]),
std::min<array_index_type>(
(m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) /
block.y,
maxblocks[1]),
std::min<array_index_type>(
(m_policy.m_upper[2] - m_policy.m_lower[2] + block.z - 1) /
block.z,
maxblocks[2]));
hip_parallel_launch<ClosureType, LaunchBounds>(
*this, grid, block, 0,
m_policy.space().impl_internal_space_instance(), false);
} else if (Policy::rank == 4) {
// id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to
// threadIdx.z
dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1],
m_policy.m_tile[2], m_policy.m_tile[3]);
dim3 const grid(
std::min<array_index_type>(
m_policy.m_tile_end[0] * m_policy.m_tile_end[1], maxblocks[0]),
std::min<array_index_type>(
(m_policy.m_upper[2] - m_policy.m_lower[2] + block.y - 1) /
block.y,
maxblocks[1]),
std::min<array_index_type>(
(m_policy.m_upper[3] - m_policy.m_lower[3] + block.z - 1) /
block.z,
maxblocks[2]));
hip_parallel_launch<ClosureType, LaunchBounds>(
*this, grid, block, 0,
m_policy.space().impl_internal_space_instance(), false);
} else if (Policy::rank == 5) {
// id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4
// to threadIdx.z
dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1],
m_policy.m_tile[2] * m_policy.m_tile[3],
m_policy.m_tile[4]);
dim3 const grid(
std::min<array_index_type>(
m_policy.m_tile_end[0] * m_policy.m_tile_end[1], maxblocks[0]),
std::min<array_index_type>(
m_policy.m_tile_end[2] * m_policy.m_tile_end[3], maxblocks[1]),
std::min<array_index_type>(
(m_policy.m_upper[4] - m_policy.m_lower[4] + block.z - 1) /
block.z,
maxblocks[2]));
hip_parallel_launch<ClosureType, LaunchBounds>(
*this, grid, block, 0,
m_policy.space().impl_internal_space_instance(), false);
} else if (Policy::rank == 6) {
// id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y;
// id4,id5 to threadIdx.z
dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1],
m_policy.m_tile[2] * m_policy.m_tile[3],
m_policy.m_tile[4] * m_policy.m_tile[5]);
dim3 const grid(
std::min<array_index_type>(
m_policy.m_tile_end[0] * m_policy.m_tile_end[1], maxblocks[0]),
std::min<array_index_type>(
m_policy.m_tile_end[2] * m_policy.m_tile_end[3], maxblocks[1]),
std::min<array_index_type>(
m_policy.m_tile_end[4] * m_policy.m_tile_end[5], maxblocks[2]));
hip_parallel_launch<ClosureType, LaunchBounds>(
*this, grid, block, 0,
m_policy.space().impl_internal_space_instance(), false);
} else {
Kokkos::abort("Kokkos::MDRange Error: Exceeded rank bounds with HIP\n");
}

} // end execute

ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy)
: m_functor(arg_functor), m_policy(arg_policy) {}

template <typename Policy, typename Functor>
static int max_tile_size_product(const Policy&, const Functor&) {
using closure_type =
ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, HIP>;
unsigned block_size = hip_get_max_blocksize<closure_type, LaunchBounds>();
if (block_size == 0)
Kokkos::Impl::throw_runtime_exception(
std::string("Kokkos::Impl::ParallelFor< HIP > could not find a valid "
"tile size."));
return block_size;
}
};

} // namespace Impl
} // namespace Kokkos

#endif
100 changes: 100 additions & 0 deletions core/src/HIP/Kokkos_HIP_ParallelFor_Range.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_HIP_PARALLEL_FOR_RANGE_HPP
#define KOKKOS_HIP_PARALLEL_FOR_RANGE_HPP

#include <Kokkos_Parallel.hpp>

#include <HIP/Kokkos_HIP_BlockSize_Deduction.hpp>
#include <HIP/Kokkos_HIP_KernelLaunch.hpp>

namespace Kokkos {
namespace Impl {

template <class FunctorType, class... Traits>
class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::HIP> {
public:
using Policy = Kokkos::RangePolicy<Traits...>;

private:
using Member = typename Policy::member_type;
using WorkTag = typename Policy::work_tag;
using LaunchBounds = typename Policy::launch_bounds;

const FunctorType m_functor;
const Policy m_policy;

template <class TagType>
inline __device__ std::enable_if_t<std::is_void<TagType>::value> exec_range(
const Member i) const {
m_functor(i);
}

template <class TagType>
inline __device__ std::enable_if_t<!std::is_void<TagType>::value> exec_range(
const Member i) const {
m_functor(TagType(), i);
}

public:
using functor_type = FunctorType;

ParallelFor() = delete;
ParallelFor(ParallelFor const&) = default;
ParallelFor& operator=(ParallelFor const&) = delete;

inline __device__ void operator()() const {
const Member work_stride = blockDim.y * gridDim.x;
const Member work_end = m_policy.end();

for (Member iwork =
m_policy.begin() + threadIdx.y + blockDim.y * blockIdx.x;
iwork < work_end;
iwork = iwork < work_end - work_stride ? iwork + work_stride
: work_end) {
this->template exec_range<WorkTag>(iwork);
}
}

inline void execute() const {
const typename Policy::index_type nwork = m_policy.end() - m_policy.begin();

using DriverType = ParallelFor<FunctorType, Policy, Kokkos::HIP>;
const int block_size =
Kokkos::Impl::hip_get_preferred_blocksize<DriverType, LaunchBounds>();
const dim3 block(1, block_size, 1);
const dim3 grid(
typename Policy::index_type((nwork + block.y - 1) / block.y), 1, 1);

if (block_size == 0) {
Kokkos::Impl::throw_runtime_exception(
std::string("Kokkos::Impl::ParallelFor< HIP > could not find a "
"valid execution configuration."));
}
Kokkos::Impl::hip_parallel_launch<DriverType, LaunchBounds>(
*this, grid, block, 0, m_policy.space().impl_internal_space_instance(),
false);
}

ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy)
: m_functor(arg_functor), m_policy(arg_policy) {}
};

} // namespace Impl
} // namespace Kokkos

#endif

0 comments on commit 6b4ee34

Please sign in to comment.