!initialized() should be a precondition for calling {Cuda,HIP,SYCL}In…

…ternal::initialize
nliber · Sep 6, 2023 · c692a81 · c692a81
1 parent d8846bf
commit c692a81
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 74 deletions.
diff --git a/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@@ -371,32 +371,24 @@ void CudaInternal::fence() const {
 }
 
 void CudaInternal::initialize(cudaStream_t stream, bool manage_stream) {
+  KOKKOS_EXPECTS(!is_initialized());
+
   if (was_finalized)
     Kokkos::abort("Calling Cuda::initialize after Cuda::finalize is illegal\n");
   was_initialized = true;
-  if (is_initialized()) return;
-
-  const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags;
 
-  if (ok_init) {
-    //----------------------------------
-    // Multiblock reduction uses scratch flags for counters
-    // and scratch space for partial reduction values.
-    // Allocate some initial space.  This will grow as needed.
+  //----------------------------------
+  // Multiblock reduction uses scratch flags for counters
+  // and scratch space for partial reduction values.
+  // Allocate some initial space.  This will grow as needed.
 
-    {
-      const unsigned reduce_block_count =
-          m_maxWarpCount * Impl::CudaTraits::WarpSize;
+  {
+    const unsigned reduce_block_count =
+        m_maxWarpCount * Impl::CudaTraits::WarpSize;
 
-      (void)scratch_unified(16 * sizeof(size_type));
-      (void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
-      (void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
-    }
-  } else {
-    std::ostringstream msg;
-    msg << "Kokkos::Cuda::initialize(" << m_cudaDev
-        << ") FAILED : Already initialized";
-    Kokkos::Impl::throw_runtime_exception(msg.str());
+    (void)scratch_unified(16 * sizeof(size_type));
+    (void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
+    (void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
   }
 
 #ifdef KOKKOS_ENABLE_CUDA_UVM

diff --git a/core/src/HIP/Kokkos_HIP_Instance.cpp b/core/src/HIP/Kokkos_HIP_Instance.cpp
@@ -160,33 +160,24 @@ void HIPInternal::fence(const std::string &name) const {
 }
 
 void HIPInternal::initialize(hipStream_t stream, bool manage_stream) {
+  KOKKOS_EXPECTS(!is_initialized());
+
   if (was_finalized)
     Kokkos::abort("Calling HIP::initialize after HIP::finalize is illegal\n");
 
-  if (is_initialized()) return;
-
-  const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags;
+  m_stream        = stream;
+  m_manage_stream = manage_stream;
 
-  if (ok_init) {
-    m_stream        = stream;
-    m_manage_stream = manage_stream;
+  //----------------------------------
+  // Multiblock reduction uses scratch flags for counters
+  // and scratch space for partial reduction values.
+  // Allocate some initial space.  This will grow as needed.
+  {
+    const unsigned reduce_block_count =
+        m_maxWarpCount * Impl::HIPTraits::WarpSize;
 
-    //----------------------------------
-    // Multiblock reduction uses scratch flags for counters
-    // and scratch space for partial reduction values.
-    // Allocate some initial space.  This will grow as needed.
-    {
-      const unsigned reduce_block_count =
-          m_maxWarpCount * Impl::HIPTraits::WarpSize;
-
-      (void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
-      (void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
-    }
-  } else {
-    std::ostringstream msg;
-    msg << "Kokkos::HIP::initialize(" << m_hipDev
-        << ") FAILED : Already initialized";
-    Kokkos::Impl::throw_runtime_exception(msg.str());
+    (void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
+    (void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
   }
 
   m_num_scratch_locks = concurrency();

diff --git a/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/core/src/SYCL/Kokkos_SYCL_Instance.cpp
@@ -21,6 +21,7 @@
 #include <Kokkos_Core.hpp>  //kokkos_malloc
 
 #include <impl/Kokkos_CheckedIntegerOps.hpp>
+#include <impl/Kokkos_Error.hpp>
 
 namespace Kokkos {
 namespace Experimental {
@@ -99,44 +100,31 @@ void SYCLInternal::initialize(const sycl::device& d) {
 
 // FIXME_SYCL
 void SYCLInternal::initialize(const sycl::queue& q) {
+  KOKKOS_EXPECTS(!is_initialized());
+
   if (was_finalized)
     Kokkos::abort("Calling SYCL::initialize after SYCL::finalize is illegal\n");
 
-  if (is_initialized()) return;
-
-  const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags;
-  const bool ok_dev  = true;
-  if (ok_init && ok_dev) {
-    m_queue = q;
-    // guard pushing to all_queues
-    {
-      std::scoped_lock lock(mutex);
-      all_queues.push_back(&m_queue);
-    }
-    const sycl::device& d = m_queue->get_device();
-
-    m_maxWorkgroupSize =
-        d.template get_info<sycl::info::device::max_work_group_size>();
-    // FIXME_SYCL this should give the correct value for NVIDIA GPUs
-    m_maxConcurrency =
-        m_maxWorkgroupSize * 2 *
-        d.template get_info<sycl::info::device::max_compute_units>();
-
-    m_maxShmemPerBlock =
-        d.template get_info<sycl::info::device::local_mem_size>();
+  m_queue = q;
+  // guard pushing to all_queues
+  {
+    std::scoped_lock lock(mutex);
+    all_queues.push_back(&m_queue);
+  }
+  const sycl::device& d = m_queue->get_device();
 
-    for (auto& usm_mem : m_indirectKernelMem) {
-      usm_mem.reset(*m_queue, m_instance_id);
-    }
+  m_maxWorkgroupSize =
+      d.template get_info<sycl::info::device::max_work_group_size>();
+  // FIXME_SYCL this should give the correct value for NVIDIA GPUs
+  m_maxConcurrency =
+      m_maxWorkgroupSize * 2 *
+      d.template get_info<sycl::info::device::max_compute_units>();
 
-  } else {
-    std::ostringstream msg;
-    msg << "Kokkos::Experimental::SYCL::initialize(...) FAILED";
+  m_maxShmemPerBlock =
+      d.template get_info<sycl::info::device::local_mem_size>();
 
-    if (!ok_init) {
-      msg << " : Already initialized";
-    }
-    Kokkos::Impl::throw_runtime_exception(msg.str());
+  for (auto& usm_mem : m_indirectKernelMem) {
+    usm_mem.reset(*m_queue, m_instance_id);
   }
 
 #ifdef KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED