Skip to content

Commit

Permalink
fix impl
Browse files Browse the repository at this point in the history
  • Loading branch information
fnrizzi committed Oct 9, 2023
1 parent 1ebb3af commit 1f4e3d5
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 42 deletions.
40 changes: 24 additions & 16 deletions algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <Kokkos_Core.hpp>
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include "Kokkos_MustUseKokkosSingleInTeam.hpp"
#include <std_algorithms/Kokkos_Distance.hpp>
#include <string>

Expand Down Expand Up @@ -122,25 +123,32 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
return d_first;
}

// FIXME: there is no parallel_scan overload that accepts TeamThreadRange and
// return_value, so temporarily serial implementation is used here
const std::size_t num_elements = Kokkos::Experimental::distance(first, last);
std::size_t count = 0;
Kokkos::single(
Kokkos::PerTeam(teamHandle),
[=](std::size_t& lcount) {
lcount = 0;
for (std::size_t i = 0; i < num_elements; ++i) {
const auto& myval = first[i];
if (pred(myval)) {
d_first[lcount++] = myval;
if constexpr (stdalgo_must_use_kokkos_single_for_team_scan<
typename TeamHandleType::execution_space>::value) {
std::size_t count = 0;
Kokkos::single(
Kokkos::PerTeam(teamHandle),
[=](std::size_t& lcount) {
lcount = 0;
for (std::size_t i = 0; i < num_elements; ++i) {
const auto& myval = first[i];
if (pred(myval)) {
d_first[lcount++] = myval;
}
}
}
},
count);
// no barrier needed since single above broadcasts to all members
},
count);
// no barrier needed since single above broadcasts to all members
return d_first + count;

return d_first + count;
} else {
typename InputIterator::difference_type count = 0;
::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements),
StdCopyIfFunctor(first, d_first, pred), count);
// no barrier needed because of the scan accumulating into count
return d_first + count;
}
}

} // namespace Impl
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_STD_ALGORITHMS_MUSTUSEKOKKOSSINGLEINTEAM_HPP
#define KOKKOS_STD_ALGORITHMS_MUSTUSEKOKKOSSINGLEINTEAM_HPP

#include <Kokkos_Core.hpp>

namespace Kokkos {
namespace Experimental {
namespace Impl {

template <typename T>
struct stdalgo_must_use_kokkos_single_for_team_scan : std::false_type {};

// the following do not support the overload for team-level scan
// accepting an "out" value to store the scan result

// FIXME_OPENACC
#if defined(KOKKOS_ENABLE_OPENACC)
template <>
struct stdalgo_must_use_kokkos_single_for_team_scan<
Kokkos::Experimental::OpenACC> : std::true_type {};
#endif

// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET)
template <>
struct stdalgo_must_use_kokkos_single_for_team_scan<
Kokkos::Experimental::OpenMPTarget> : std::true_type {};
#endif

// FIXME_HPX
#if defined(KOKKOS_ENABLE_HPX)
template <>
struct stdalgo_must_use_kokkos_single_for_team_scan<Kokkos::Experimental::HPX>
: std::true_type {};
#endif

// FIXME_THREADS
#if defined(KOKKOS_ENABLE_THREADS)
template <>
struct stdalgo_must_use_kokkos_single_for_team_scan<Kokkos::Threads>
: std::true_type {};
#endif

} // namespace Impl
} // namespace Experimental
} // namespace Kokkos

#endif
61 changes: 35 additions & 26 deletions algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <Kokkos_Core.hpp>
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include "Kokkos_MustUseKokkosSingleInTeam.hpp"
#include "Kokkos_CopyCopyN.hpp"
#include <std_algorithms/Kokkos_Distance.hpp>
#include <string>
Expand Down Expand Up @@ -138,33 +139,41 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
} else if (num_elements == 1) {
d_first[0] = first[0];
return d_first + 1;
} else {
// FIXME: parallel_scan is what we used for the execution space impl,
// but parallel_scan does not support TeamThreadRange, so for the
// team-level impl we do this serially for now and later figure out
// if this can be done in parallel

std::size_t count = 0;
Kokkos::single(
Kokkos::PerTeam(teamHandle),
[=](std::size_t& lcount) {
lcount = 0;
for (std::size_t i = 0; i < num_elements - 1; ++i) {
const auto& val_i = first[i];
const auto& val_ip1 = first[i + 1];
if (!pred(val_i, val_ip1)) {
d_first[lcount++] = val_i;
}

else {
if constexpr (stdalgo_must_use_kokkos_single_for_team_scan<
typename TeamHandleType::execution_space>::value) {
std::size_t count = 0;
Kokkos::single(
Kokkos::PerTeam(teamHandle),
[=](std::size_t& lcount) {
lcount = 0;
for (std::size_t i = 0; i < num_elements - 1; ++i) {
const auto& val_i = first[i];
const auto& val_ip1 = first[i + 1];
if (!pred(val_i, val_ip1)) {
d_first[lcount++] = val_i;
}
}
}
// we need to copy the last element always
d_first[lcount++] = first[num_elements - 1];
},
count);
// no barrier needed since single above broadcasts to all members

// return the correct iterator: we need +1 here because we need to
// return iterator to the element past the last element copied
return d_first + count;
// we need to copy the last element always
d_first[lcount++] = first[num_elements - 1];
},
count);
// no barrier needed since single above broadcasts to all members

return d_first + count;
} else {
const auto scan_size = num_elements - 1;
std::size_t count = 0;
::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, scan_size),
StdUniqueCopyFunctor(first, last, d_first, pred),
count);
// no barrier needed since reducing into count

return Impl::copy_team_impl(teamHandle, first + scan_size, last,
d_first + count);
}
}
}

Expand Down

0 comments on commit 1f4e3d5

Please sign in to comment.