Skip to content

Commit

Permalink
Desul atomics: let relocatable device code mode be part of the config…
Browse files Browse the repository at this point in the history
…uration (kokkos#5991)

* Desul atomics: Make compilation fail if SEPARABLE_COMPILATION configuration is not compatible with the relocatable device code mode

* Desul atomics: Prefer #ifdef DESUL_ATOMICS_ENABLE_{CUDA,HIP}_SEPARABLE_DEVICE_CODE macro guards

* Desul atomics: add DESUL_ATOMICS_ENABLE_{CUDA,HIP}_SEPARABLE_COMPILATION macros

* Configure DESUL_ATOMICS_ENABLE_{CUDA,HIP}_SEPARABLE_COMPILATION in bundled version of the desul atomics

* Merge latest version

* Fixup Clang+CUDA defines __CLANG_RDC__ instead of __CUDACC_RDC__
  • Loading branch information
dalg24 committed Apr 11, 2023
1 parent 0702062 commit 48b34de
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 67 deletions.
10 changes: 10 additions & 0 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -1399,12 +1399,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA */")
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION")
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION */")
endif

ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP")
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP */")
endif
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION")
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION */")
endif

ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_SYCL")
Expand Down
6 changes: 6 additions & 0 deletions core/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@ IF (NOT desul_FOUND)
IF(KOKKOS_ENABLE_CUDA)
SET(DESUL_ATOMICS_ENABLE_CUDA ON)
ENDIF()
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
SET(DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION ON)
ENDIF()
IF(KOKKOS_ENABLE_HIP)
SET(DESUL_ATOMICS_ENABLE_HIP ON)
ENDIF()
IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
SET(DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION ON)
ENDIF()
IF(KOKKOS_ENABLE_SYCL)
SET(DESUL_ATOMICS_ENABLE_SYCL ON)
ENDIF()
Expand Down
2 changes: 2 additions & 0 deletions tpls/desul/Config.hpp.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ SPDX-License-Identifier: (BSD-3-Clause)
#define DESUL_ATOMICS_CONFIG_HPP_

#cmakedefine DESUL_ATOMICS_ENABLE_CUDA
#cmakedefine DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
#cmakedefine DESUL_ATOMICS_ENABLE_HIP
#cmakedefine DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
#cmakedefine DESUL_ATOMICS_ENABLE_SYCL
#cmakedefine DESUL_ATOMICS_ENABLE_OPENMP

Expand Down
8 changes: 4 additions & 4 deletions tpls/desul/include/desul/atomics/Lock_Array_CUDA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ void finalize_lock_arrays_cuda();
/// variable based on the Host global variable prior to running any kernels
/// that will use it.
/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
#ifdef __CUDACC_RDC__
#ifdef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
extern
#endif
__device__ __constant__ int32_t* CUDA_SPACE_ATOMIC_LOCKS_DEVICE;

#ifdef __CUDACC_RDC__
#ifdef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
extern
#endif
__device__ __constant__ int32_t* CUDA_SPACE_ATOMIC_LOCKS_NODE;
Expand Down Expand Up @@ -108,7 +108,7 @@ __device__ inline void unlock_address_cuda(void* ptr, desul::MemoryScopeNode) {
atomicExch(&desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE[offset], 0);
}

#ifdef __CUDACC_RDC__
#ifdef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
inline
#else
inline static
Expand All @@ -132,7 +132,7 @@ inline static

namespace desul {

#if defined(__CUDACC_RDC__)
#ifdef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
inline void ensure_cuda_lock_arrays_on_device() {}
#else
static inline void ensure_cuda_lock_arrays_on_device() {
Expand Down
8 changes: 4 additions & 4 deletions tpls/desul/include/desul/atomics/Lock_Array_HIP.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ void finalize_lock_arrays_hip();
* will use it. That is the purpose of the
* ensure_hip_lock_arrays_on_device function.
*/
#ifdef __CLANG_RDC__
#ifdef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
extern
#endif
__device__ __constant__ int32_t* HIP_SPACE_ATOMIC_LOCKS_DEVICE;

#ifdef __CLANG_RDC__
#ifdef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
extern
#endif
__device__ __constant__ int32_t* HIP_SPACE_ATOMIC_LOCKS_NODE;
Expand Down Expand Up @@ -115,7 +115,7 @@ __device__ inline void unlock_address_hip(void* ptr, desul::MemoryScopeNode) {
atomicExch(&desul::Impl::HIP_SPACE_ATOMIC_LOCKS_NODE[offset], 0);
}

#ifdef __CLANG_RDC__
#ifdef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
inline
#else
inline static
Expand All @@ -135,7 +135,7 @@ inline static
}
} // namespace Impl

#if defined(__CLANG_RDC__)
#ifdef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
inline void ensure_hip_lock_arrays_on_device() {}
#else
static inline void ensure_hip_lock_arrays_on_device() {
Expand Down

This file was deleted.

28 changes: 28 additions & 0 deletions tpls/desul/include/desul/atomics/Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,34 @@ SPDX-License-Identifier: (BSD-3-Clause)

#include <desul/atomics/Config.hpp>

// Intercept incompatible relocatable device code mode which leads to ODR violations
#ifdef DESUL_ATOMICS_ENABLE_CUDA
#if (defined(__clang__) && defined(__CUDA__) && defined(__CLANG_RDC__)) || \
defined(__CUDACC_RDC__)
#define DESUL_IMPL_CUDA_RDC
#endif

#if (defined(DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION) && \
!defined(DESUL_IMPL_CUDA_RDC)) || \
(!defined(DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION) && \
defined(DESUL_IMPL_CUDA_RDC))
#error Relocatable device code mode incompatible with desul atomics configuration
#endif

#ifdef DESUL_IMPL_CUDA_RDC
#undef DESUL_IMPL_CUDA_RDC
#endif
#endif

#ifdef DESUL_ATOMICS_ENABLE_HIP
#if (defined(DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION) && \
!defined(__CLANG_RDC__)) || \
(!defined(DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION) && \
defined(__CLANG_RDC__))
#error Relocatable device code mode incompatible with desul atomics configuration
#endif
#endif

// Macros

#if defined(DESUL_ATOMICS_ENABLE_CUDA) && defined(__CUDACC__)
Expand Down
4 changes: 2 additions & 2 deletions tpls/desul/src/Lock_Array_CUDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ SPDX-License-Identifier: (BSD-3-Clause)
#include <sstream>
#include <string>

#ifdef __CUDACC_RDC__
#ifdef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
namespace desul {
namespace Impl {
__device__ __constant__ int32_t* CUDA_SPACE_ATOMIC_LOCKS_DEVICE = nullptr;
Expand Down Expand Up @@ -83,7 +83,7 @@ void finalize_lock_arrays_cuda() {
cudaFreeHost(CUDA_SPACE_ATOMIC_LOCKS_NODE_h);
CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h = nullptr;
CUDA_SPACE_ATOMIC_LOCKS_NODE_h = nullptr;
#ifdef __CUDACC_RDC__
#ifdef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION
copy_cuda_lock_arrays_to_device();
#endif
}
Expand Down
4 changes: 2 additions & 2 deletions tpls/desul/src/Lock_Array_HIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ SPDX-License-Identifier: (BSD-3-Clause)
#include <sstream>
#include <string>

#ifdef __CLANG_RDC__
#ifdef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
namespace desul {
namespace Impl {
__device__ __constant__ int32_t* HIP_SPACE_ATOMIC_LOCKS_DEVICE = nullptr;
Expand Down Expand Up @@ -87,7 +87,7 @@ void finalize_lock_arrays_hip() {
check_error_and_throw_hip(error_free2, "finalize_lock_arrays_hip: free host locks");
HIP_SPACE_ATOMIC_LOCKS_DEVICE_h = nullptr;
HIP_SPACE_ATOMIC_LOCKS_NODE_h = nullptr;
#ifdef __CLANG_RDC__
#ifdef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION
copy_hip_lock_arrays_to_device();
#endif
}
Expand Down

0 comments on commit 48b34de

Please sign in to comment.