Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for rocThrust in sort when using HIP #6793

Merged
merged 4 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ pipeline {
dockerfile {
filename 'Dockerfile.hipcc'
dir 'scripts/docker'
additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2'
additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2-complete'
label 'rocm-docker '
args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES'
}
Expand Down Expand Up @@ -181,7 +181,7 @@ pipeline {
dockerfile {
filename 'Dockerfile.hipcc'
dir 'scripts/docker'
additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.6'
additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.6-complete'
label 'rocm-docker'
args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES'
}
Expand Down
3 changes: 1 addition & 2 deletions algorithms/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,4 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
${CMAKE_CURRENT_SOURCE_DIR}
)



KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST)
42 changes: 42 additions & 0 deletions algorithms/src/sorting/impl/Kokkos_SortImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@

#endif

#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif

#if defined(KOKKOS_ENABLE_ONEDPL)
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
Expand Down Expand Up @@ -184,6 +189,26 @@ void sort_cudathrust(const Cuda& space,
}
#endif

#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class DataType, class... Properties, class... MaybeComparator>
void sort_rocthrust(const HIP& space,
const Kokkos::View<DataType, Properties...>& view,
MaybeComparator&&... maybeComparator) {
using ViewType = Kokkos::View<DataType, Properties...>;
static_assert(ViewType::rank == 1,
"Kokkos::sort: currently only supports rank-1 Views.");

if (view.extent(0) <= 1) {
return;
}
const auto exec = thrust::hip::par.on(space.hip_stream());
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
thrust::sort(exec, first, last,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif

#if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties, class... MaybeComparator>
void sort_onedpl(const Kokkos::Experimental::SYCL& space,
Expand Down Expand Up @@ -274,6 +299,14 @@ void sort_device_view_without_comparator(
}
#endif

#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class DataType, class... Properties>
void sort_device_view_without_comparator(
const HIP& exec, const Kokkos::View<DataType, Properties...>& view) {
sort_rocthrust(exec, view);
}
#endif

#if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties>
void sort_device_view_without_comparator(
Expand Down Expand Up @@ -320,6 +353,15 @@ void sort_device_view_with_comparator(
}
#endif

#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator(
const HIP& exec, const Kokkos::View<DataType, Properties...>& view,
const ComparatorType& comparator) {
sort_rocthrust(exec, view, comparator);
}
#endif

#if defined(KOKKOS_ENABLE_ONEDPL)
template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator(
Expand Down
1 change: 1 addition & 0 deletions cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#cmakedefine KOKKOS_ENABLE_LIBDL
#cmakedefine KOKKOS_ENABLE_LIBQUADMATH
#cmakedefine KOKKOS_ENABLE_ONEDPL
#cmakedefine KOKKOS_ENABLE_ROCTHRUST
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to add that setting to HIP::print_configuration()

@masterleinad same comment about KOKKOS_ENABLE_ONEDPL and SYCL

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me do that after #6758 is merged.


#cmakedefine KOKKOS_ARCH_ARMV80
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
Expand Down
12 changes: 12 additions & 0 deletions cmake/Modules/FindTPLROCTHRUST.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# ROCm 5.6 and earlier set AMDGPU_TARGETS and GPU_TARGETS to all the supported
# architectures. Therefore, we end up compiling Kokkos for all the supported
# architecture. Starting with ROCm 5.7 AMDGPU_TARGETS and GPU_TARGETS are empty.
# It is the user's job to set the variables. Since we are injecting the
# architecture flag ourselves, we can let the variables empty. To replicate the
# behavior of ROCm 5.7 and later for earlier version of ROCm we set
# AMDGPU_TARGETS and GPU_TARGETS to empty and set the values in the cache. If
# the values are not cached, FIND_PACKAGE(rocthrust) will overwrite them.
SET(AMDGPU_TARGETS "" CACHE STRING "AMD GPU targets to compile for")
SET(GPU_TARGETS "" CACHE STRING "GPU targets to compile for")
FIND_PACKAGE(rocthrust REQUIRED)
dalg24 marked this conversation as resolved.
Show resolved Hide resolved
KOKKOS_CREATE_IMPORTED_TPL(ROCTHRUST INTERFACE LINK_LIBRARIES roc::rocthrust)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still have the arborx/ArborX#652 issue where the exported rocThrust package potentially adds flags to compile for all GPU architecture it knows about?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least for the version shipped with ROCm 5.7, it works fine. I see only one architecture.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to find out for sure because IIRC compile time blows up

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked at another PR it looks like the compile time is indeed 2x more for both 5.2 and 5.6 so we need to handle this and set the GPU_TARGETS before attempting to find rocThrust.

3 changes: 3 additions & 0 deletions cmake/kokkos_tpls.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ ELSE()
SET(ROCM_DEFAULT OFF)
ENDIF()
KOKKOS_TPL_OPTION(ROCM ${ROCM_DEFAULT})
KOKKOS_TPL_OPTION(ROCTHRUST ${KOKKOS_ENABLE_HIP})

IF(KOKKOS_ENABLE_SYCL AND NOT KOKKOS_HAS_TRILINOS)
SET(ONEDPL_DEFAULT ON)
ELSE()
Expand Down Expand Up @@ -83,6 +85,7 @@ IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
KOKKOS_IMPORT_TPL(ONEDPL INTERFACE)
ENDIF()
KOKKOS_IMPORT_TPL(LIBQUADMATH)
KOKKOS_IMPORT_TPL(ROCTHRUST)

IF (Kokkos_ENABLE_DESUL_ATOMICS_EXTERNAL)
find_package(desul REQUIRED COMPONENTS atomics)
Expand Down
7 changes: 7 additions & 0 deletions core/src/HIP/Kokkos_HIP_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@ void HIPInternal::print_configuration(std::ostream &s) const {
<< '\n';
#endif

s << "macro KOKKOS_ENABLE_ROCTHRUST : "
#if defined(KOKKOS_ENABLE_ROCTHRUST)
<< "defined\n";
#else
<< "undefined\n";
#endif

for (int i : get_visible_devices()) {
hipDeviceProp_t hipProp;
KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDeviceProperties(&hipProp, i));
Expand Down
Loading