Skip to content

Commit

Permalink
Address reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Nov 1, 2023
1 parent 8c4fe6b commit a07c7a2
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Expand Up @@ -97,11 +97,11 @@ __global__ void query_cuda_kernel_arch(int *d_arch) {
}

/** Query what compute capability is actually launched to the device: */
int cuda_kernel_arch(int cuda_device) {
int cuda_kernel_arch(int device_id) {
int arch = 0;
int *d_arch = nullptr;

KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(cuda_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(device_id));
KOKKOS_IMPL_CUDA_SAFE_CALL(
cudaMalloc(reinterpret_cast<void **>(&d_arch), sizeof(int)));
KOKKOS_IMPL_CUDA_SAFE_CALL(
Expand Down Expand Up @@ -390,8 +390,6 @@ void CudaInternal::initialize(cudaStream_t stream, bool manage_stream) {
"Currently, the device id must match the device id used when Kokkos "
"was initialized!");

was_initialized = true;

//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
Expand Down Expand Up @@ -688,8 +686,12 @@ void Cuda::impl_initialize(InitializationSettings const &settings) {

const struct cudaDeviceProp &cudaProp = dev_info.m_cudaProp[cuda_device_id];

Impl::CudaInternal::m_cudaDev = cuda_device_id;
Impl::CudaInternal::m_deviceProp = cudaProp;

Kokkos::Impl::cuda_device_synchronize(
"Kokkos::CudaInternal::initialize: Fence on space initialization");

// Query what compute capability architecture a kernel executes:
Impl::CudaInternal::m_cudaArch = Impl::cuda_kernel_arch(cuda_device_id);

Expand Down

0 comments on commit a07c7a2

Please sign in to comment.