Skip to content

Commit

Permalink
Address reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Feb 1, 2024
1 parent 191003f commit 1b9118c
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,17 @@ void CudaInternal::initialize(cudaStream_t stream) {
m_stream = stream;
CudaInternal::cuda_devices.insert(m_cudaDev);

// Allocate a staging buffer for constant mem in pinned host memory
// and an event to avoid overwriting driver for previous kernel launches
if (!constantMemHostStagingPerDevice[m_cudaDev])
KOKKOS_IMPL_CUDA_SAFE_CALL((cuda_malloc_host_wrapper(
reinterpret_cast<void **>(&constantMemHostStagingPerDevice[m_cudaDev]),
CudaTraits::ConstantMemoryUsage)));

if (!constantMemReusablePerDevice[m_cudaDev])
KOKKOS_IMPL_CUDA_SAFE_CALL(
(cuda_event_create_wrapper(&constantMemReusablePerDevice[m_cudaDev])));

//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
Expand All @@ -313,17 +324,6 @@ void CudaInternal::initialize(cudaStream_t stream) {
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}

// Allocate a staging buffer for constant mem in pinned host memory
// and an event to avoid overwriting driver for previous kernel launches
if (!constantMemHostStagingPerDevice[m_cudaDev])
KOKKOS_IMPL_CUDA_SAFE_CALL((cuda_malloc_host_wrapper(
reinterpret_cast<void **>(&constantMemHostStagingPerDevice[m_cudaDev]),
CudaTraits::ConstantMemoryUsage)));

if (!constantMemReusablePerDevice[m_cudaDev])
KOKKOS_IMPL_CUDA_SAFE_CALL(
(cuda_event_create_wrapper(&constantMemReusablePerDevice[m_cudaDev])));

for (int i = 0; i < m_n_team_scratch; ++i) {
m_team_scratch_current_size[i] = 0;
m_team_scratch_ptr[i] = nullptr;
Expand Down Expand Up @@ -633,7 +633,7 @@ void Cuda::impl_finalize() {
(void)Impl::cuda_global_unique_token_locks(true);
desul::Impl::finalize_lock_arrays(); // FIXME

for (auto &cuda_device : Kokkos::Impl::CudaInternal::cuda_devices) {
for (const auto cuda_device : Kokkos::Impl::CudaInternal::cuda_devices) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(cuda_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(
cudaFreeHost(Kokkos::Impl::CudaInternal::constantMemHostStagingPerDevice
Expand Down

0 comments on commit 1b9118c

Please sign in to comment.