Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for CUDA unified memory architectures i.e. Grace Hopper #6823

Open
wants to merge 16 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 11 additions & 2 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ KOKKOS_DEVICES ?= "Threads"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
Expand Down Expand Up @@ -415,7 +415,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc))

# IBM based.
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
Expand Down Expand Up @@ -778,6 +779,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1)
endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON")

KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
Comment on lines +786 to +787
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

neoverse-v2 is not a valid gcc-12.2 architecture, do we need to protect this flag?

endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
Expand Down
3 changes: 3 additions & 0 deletions cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY
dalg24 marked this conversation as resolved.
Show resolved Hide resolved
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
Expand Down Expand Up @@ -65,6 +67,7 @@
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
#cmakedefine KOKKOS_ARCH_ARMV81
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2
#cmakedefine KOKKOS_ARCH_ARMV9_GRACE
#cmakedefine KOKKOS_ARCH_A64FX
#cmakedefine KOKKOS_ARCH_AVX
#cmakedefine KOKKOS_ARCH_AVX2
Expand Down
16 changes: 16 additions & 0 deletions cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ KOKKOS_CHECK_DEPRECATED_OPTIONS(
#-------------------------------------------------------------------------------
SET(KOKKOS_ARCH_LIST)

include(CheckCXXCompilerFlag)

KOKKOS_DEPRECATED_LIST(ARCH ARCH)

Expand All @@ -49,6 +50,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU")
DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support")
DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU")
DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs")
DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs")
DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs")
Expand Down Expand Up @@ -301,6 +303,20 @@ IF (KOKKOS_ARCH_A64FX)
)
ENDIF()

IF (KOKKOS_ARCH_ARMV9_GRACE)
SET(KOKKOS_ARCH_ARM_NEON ON)
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
IF (COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128
)
ELSE()
MESSAGE(WARNING "Compiler does not support ARMv9 Grace architecture")
ENDIF()
ENDIF()

IF (KOKKOS_ARCH_ZEN)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Expand Down
1 change: 0 additions & 1 deletion cmake/kokkos_enable_devices.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ ELSE()
ENDIF()
KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend")


# We want this to default to OFF for cache reasons, but if no
# host space is given, then activate serial
IF (KOKKOS_HAS_TRILINOS)
Expand Down
3 changes: 3 additions & 0 deletions cmake/kokkos_enable_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
# resolved but we keep the option around a bit longer to be safe.
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
KOKKOS_ENABLE_OPTION(IMPL_CUDA_EMULATE_UNIFIED_MEMORY OFF "Whether to emulate unified memory architectures for CUDA")

KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
Expand Down
13 changes: 13 additions & 0 deletions containers/unit_tests/TestWithoutInitializing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@
#endif
///@}

/// Some tests are skipped for unified memory space
#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) || \
defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE \
if constexpr (std::is_same_v<typename TEST_EXECSPACE::memory_space, \
Kokkos::CudaSpace>) \
GTEST_SKIP() << "skipping since unified memory requires additional " \
"fences";
#else
#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE
#endif

TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) {
using namespace Kokkos::Test::Tools;
listen_tool_events(Config::DisableAll(), Config::EnableKernels());
Expand Down Expand Up @@ -657,6 +669,7 @@ TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview) {

TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynamicview) {
GTEST_SKIP_IF_CUDAUVM_MEMORY_SPACE
GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE

using namespace Kokkos::Test::Tools;
listen_tool_events(Config::DisableAll(), Config::EnableKernels(),
Expand Down
50 changes: 47 additions & 3 deletions core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#include <algorithm>
#include <atomic>

//#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp>
#include <impl/Kokkos_Error.hpp>

#include <impl/Kokkos_Tools.hpp>
Expand Down Expand Up @@ -184,6 +183,36 @@ void *impl_allocate_common(const int device_id,
cudaError_t error_code = cudaSuccess;
#ifndef CUDART_VERSION
#error CUDART_VERSION undefined!
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
// This is inteded to simulate Grace-Hopper like behavior
cedricchevalier19 marked this conversation as resolved.
Show resolved Hide resolved
error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal);
if (error_code == cudaSuccess) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaDeviceSynchronize());
}
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
// This is intended for Grace-Hopper (and future unified memory architectures)
// The idea is to use host allocator and then advise to keep it in HBM on
// device, but that requires CUDA 12.2
static_assert(CUDART_VERSION >= 12020,
"CUDA runtime version >=12.2 required when "
"Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY is set");
if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr
ptr = malloc(arg_alloc_size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cudaMalloc has a 256 alignment, malloc does not provide any guarantee about alignment.

Do we care ?

if (ptr != nullptr) {
// One would think cudaMemLocation{device_id,
// cudaMemLocationTypeDeivce} would work but it doesn't. I.e. the order of
// members doesn't seem to be defined.
cudaMemLocation loc;
loc.id = device_id;
loc.type = cudaMemLocationTypeDevice;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
} else {
// I think this is the most logical error to return unless we
// want a different mechanism for this code path
error_code = cudaErrorMemoryAllocation;
}
}
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream);
Expand All @@ -196,9 +225,13 @@ void *impl_allocate_common(const int device_id,
"Kokkos::Cuda: backend fence after async malloc");
}
}
} else
} else {
error_code = cudaMalloc(&ptr, arg_alloc_size);
}
#else
error_code = cudaMalloc(&ptr, arg_alloc_size);
#endif
{ error_code = cudaMalloc(&ptr, arg_alloc_size); }

if (error_code != cudaSuccess) { // TODO tag as unlikely branch
// This is the only way to clear the last error, which
// we should do here since we're turning it into an
Expand Down Expand Up @@ -344,6 +377,13 @@ void CudaSpace::impl_deallocate(
try {
#ifndef CUDART_VERSION
#error CUDART_VERSION undefined!
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
Impl::cuda_device_synchronize(
"Kokkos::Cuda: backend fence before unified memory free");
free(arg_alloc_ptr);
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
Impl::cuda_device_synchronize(
Expand Down Expand Up @@ -463,8 +503,12 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes,

#include <impl/Kokkos_SharedAlloc_timpl.hpp>

#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaSpace);
#else
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::CudaSpace);
#endif
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaUVMSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Expand Down
23 changes: 22 additions & 1 deletion core/src/Cuda/Kokkos_CudaSpace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ class CudaSpace {
void* allocate(const char* arg_label, const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const;

#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
template <typename ExecutionSpace>
void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const {
crtrott marked this conversation as resolved.
Show resolved Hide resolved
return allocate(arg_alloc_size);
}
template <typename ExecutionSpace>
void* allocate(const ExecutionSpace&, const char* arg_label,
const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const {
return allocate(arg_label, arg_alloc_size, arg_logical_size);
}
#endif

/**\brief Deallocate untracked memory in the cuda space */
void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
void deallocate(const char* arg_label, void* const arg_alloc_ptr,
Expand Down Expand Up @@ -337,7 +350,11 @@ static_assert(
template <>
struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
enum : bool { assignable = false };
enum : bool { accessible = false };
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
enum : bool{accessible = false};
#else
enum : bool { accessible = true };
#endif
enum : bool { deepcopy = true };
};

Expand Down Expand Up @@ -558,8 +575,12 @@ struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION(
Kokkos::CudaSpace);
#else
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaSpace);
#endif
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaUVMSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaHostPinnedSpace);

Expand Down
24 changes: 24 additions & 0 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,22 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default

//----------------------------------

#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
// Check if unified memory is available
int cuda_result;
cudaDeviceGetAttribute(&cuda_result, cudaDevAttrConcurrentManagedAccess,
cuda_device_id);
if (cuda_result == 0) {
Kokkos::abort(
"Kokkos::Cuda::initialize ERROR: Unified memory is not available on "
"this device\n"
"Please recompile Kokkos with "
"-DKokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF\n");
}
#endif

//----------------------------------

cudaStream_t singleton_stream;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(cuda_device_id));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreate(&singleton_stream));
Expand Down Expand Up @@ -705,6 +721,14 @@ void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const {
#else
os << "no\n";
#endif
#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
os << " KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY: ";
os << "yes\n";
#endif
#ifdef KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY
os << " KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY: ";
os << "yes\n";
#endif

os << "\nCuda Runtime Configuration:\n";

Expand Down
6 changes: 6 additions & 0 deletions core/src/Cuda/Kokkos_Cuda_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,13 @@ class CudaInternal {
template <bool setCudaDevice = true>
cudaError_t cuda_malloc_wrapper(void** devPtr, size_t size) const {
if constexpr (setCudaDevice) set_cuda_device();
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
return cudaMalloc(devPtr, size);
#else
auto ptr = cudaMallocManaged(devPtr, size, cudaMemAttachGlobal);
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaDeviceSynchronize());
return ptr;
#endif
}

template <bool setCudaDevice = true>
Expand Down
9 changes: 9 additions & 0 deletions core/src/Kokkos_Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,15 @@ static constexpr bool kokkos_omp_on_host() { return false; }
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#endif

#if defined(KOKKOS_ENABLE_IMPL_CUDA_EMULATE_UNIFIED_MEMORY)
#define KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#endif

// TODO: enable the following when we are sure it is the right thing to do
//#if defined(KOKKOS_ARCH_ARMV9_GRACE) && defined(KOKKOS_ARCH_HOPPER90)
//#define KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
//#endif
Comment on lines +544 to +547
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we only care about emulating for now? Or do we want to enable this before merging after testing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a cmake option for this so you can enable it explicitly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I still think we should test this in at least one CI build before merging.


#define KOKKOS_INVALID_INDEX (~std::size_t(0))

#define KOKKOS_IMPL_CTOR_DEFAULT_ARG KOKKOS_INVALID_INDEX
Expand Down
16 changes: 16 additions & 0 deletions core/unit_test/cuda/TestCuda_Spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,14 @@ TEST(cuda, space_access) {
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Kokkos::CudaSpace>::assignable);

#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
crtrott marked this conversation as resolved.
Show resolved Hide resolved
static_assert(
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Kokkos::CudaSpace>::accessible);
#else
static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Kokkos::CudaSpace>::accessible);
#endif

static_assert(
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
Expand Down Expand Up @@ -147,8 +152,13 @@ TEST(cuda, space_access) {
Kokkos::SpaceAccessibility<Kokkos::Cuda,
Kokkos::CudaHostPinnedSpace>::accessible);

#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
static_assert(!Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::CudaSpace>::accessible);
#else
static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::CudaSpace>::accessible);
#endif

static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::CudaUVMSpace>::accessible);
Expand All @@ -157,8 +167,14 @@ TEST(cuda, space_access) {
Kokkos::SpaceAccessibility<Kokkos::HostSpace,
Kokkos::CudaHostPinnedSpace>::accessible);

#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
static_assert(std::is_same<Kokkos::Impl::HostMirror<Kokkos::CudaSpace>::Space,
Kokkos::HostSpace>::value);
#else
static_assert(std::is_same<Kokkos::Impl::HostMirror<Kokkos::CudaSpace>::Space,
Kokkos::Device<Kokkos::HostSpace::execution_space,
Kokkos::CudaSpace>>::value);
#endif

static_assert(
std::is_same<Kokkos::Impl::HostMirror<Kokkos::CudaUVMSpace>::Space,
Expand Down