Skip to content

Commit

Permalink
Fix cudaAPI wrapper errors for CUDA_MALLOC_ASYNC (kokkos#6346)
Browse files Browse the repository at this point in the history
* Fix errors relating to KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC

- default arg for cupda api wrappers fix
- input stream name for wrappers used with typo

* add to CI build

* cast int -> size_t to avoid comparison warning

* Change def of memory_threashold_g to size_t
  • Loading branch information
tcclevenger committed Aug 11, 2023
1 parent da49ee2 commit e454cc6
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
1 change: 1 addition & 0 deletions .jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ pipeline {
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_LIBDL=OFF \
-DKokkos_ENABLE_IMPL_MDSPAN=ON \
-DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=ON \
.. && \
make -j8 && ctest --verbose && \
cd ../example/build_cmake_in_tree && \
Expand Down
2 changes: 1 addition & 1 deletion core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ CudaUVMSpace::CudaUVMSpace() : m_device(Kokkos::Cuda().cuda_device()) {}

CudaHostPinnedSpace::CudaHostPinnedSpace() {}

int memory_threshold_g = 40000; // 40 kB
size_t memory_threshold_g = 40000; // 40 kB

//==============================================================================
// <editor-fold desc="allocate()"> {{{1
Expand Down
8 changes: 4 additions & 4 deletions core/src/Cuda/Kokkos_Cuda_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,16 +487,16 @@ class CudaInternal {
#if (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
template <bool setCudaDevice = true>
cudaError_t cuda_malloc_async_wrapper(void** devPtr, size_t size,
cudaStream_t hStream == nullptr) const {
cudaStream_t hStream = nullptr) const {
if constexpr (setCudaDevice) set_cuda_device();
return cudaMallocAsync(devPtr, size, get_input_stream(stream));
return cudaMallocAsync(devPtr, size, get_input_stream(hStream));
}

template <bool setCudaDevice = true>
cudaError_t cuda_free_async_wrapper(void* devPtr,
cudaStream_t hStream == nullptr) const {
cudaStream_t hStream = nullptr) const {
if constexpr (setCudaDevice) set_cuda_device();
return cudaFreeAsync(devPtr, get_input_stream(stream));
return cudaFreeAsync(devPtr, get_input_stream(hStream));
}
#endif

Expand Down

0 comments on commit e454cc6

Please sign in to comment.