Skip to content

Commit

Permalink
!initialized() should be a precondition for calling {Cuda,HIP,SYCL}In…
Browse files Browse the repository at this point in the history
…ternal::initialize
  • Loading branch information
dalg24 committed Sep 6, 2023
1 parent d8846bf commit c692a81
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 74 deletions.
32 changes: 12 additions & 20 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,32 +371,24 @@ void CudaInternal::fence() const {
}

void CudaInternal::initialize(cudaStream_t stream, bool manage_stream) {
KOKKOS_EXPECTS(!is_initialized());

if (was_finalized)
Kokkos::abort("Calling Cuda::initialize after Cuda::finalize is illegal\n");
was_initialized = true;
if (is_initialized()) return;

const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags;

if (ok_init) {
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.

{
const unsigned reduce_block_count =
m_maxWarpCount * Impl::CudaTraits::WarpSize;
{
const unsigned reduce_block_count =
m_maxWarpCount * Impl::CudaTraits::WarpSize;

(void)scratch_unified(16 * sizeof(size_type));
(void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}
} else {
std::ostringstream msg;
msg << "Kokkos::Cuda::initialize(" << m_cudaDev
<< ") FAILED : Already initialized";
Kokkos::Impl::throw_runtime_exception(msg.str());
(void)scratch_unified(16 * sizeof(size_type));
(void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}

#ifdef KOKKOS_ENABLE_CUDA_UVM
Expand Down
35 changes: 13 additions & 22 deletions core/src/HIP/Kokkos_HIP_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,33 +160,24 @@ void HIPInternal::fence(const std::string &name) const {
}

void HIPInternal::initialize(hipStream_t stream, bool manage_stream) {
KOKKOS_EXPECTS(!is_initialized());

if (was_finalized)
Kokkos::abort("Calling HIP::initialize after HIP::finalize is illegal\n");

if (is_initialized()) return;

const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags;
m_stream = stream;
m_manage_stream = manage_stream;

if (ok_init) {
m_stream = stream;
m_manage_stream = manage_stream;
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
{
const unsigned reduce_block_count =
m_maxWarpCount * Impl::HIPTraits::WarpSize;

//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
{
const unsigned reduce_block_count =
m_maxWarpCount * Impl::HIPTraits::WarpSize;

(void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}
} else {
std::ostringstream msg;
msg << "Kokkos::HIP::initialize(" << m_hipDev
<< ") FAILED : Already initialized";
Kokkos::Impl::throw_runtime_exception(msg.str());
(void)scratch_flags(reduce_block_count * 2 * sizeof(size_type));
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}

m_num_scratch_locks = concurrency();
Expand Down
52 changes: 20 additions & 32 deletions core/src/SYCL/Kokkos_SYCL_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <Kokkos_Core.hpp> //kokkos_malloc

#include <impl/Kokkos_CheckedIntegerOps.hpp>
#include <impl/Kokkos_Error.hpp>

namespace Kokkos {
namespace Experimental {
Expand Down Expand Up @@ -99,44 +100,31 @@ void SYCLInternal::initialize(const sycl::device& d) {

// FIXME_SYCL
void SYCLInternal::initialize(const sycl::queue& q) {
KOKKOS_EXPECTS(!is_initialized());

if (was_finalized)
Kokkos::abort("Calling SYCL::initialize after SYCL::finalize is illegal\n");

if (is_initialized()) return;

const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags;
const bool ok_dev = true;
if (ok_init && ok_dev) {
m_queue = q;
// guard pushing to all_queues
{
std::scoped_lock lock(mutex);
all_queues.push_back(&m_queue);
}
const sycl::device& d = m_queue->get_device();

m_maxWorkgroupSize =
d.template get_info<sycl::info::device::max_work_group_size>();
// FIXME_SYCL this should give the correct value for NVIDIA GPUs
m_maxConcurrency =
m_maxWorkgroupSize * 2 *
d.template get_info<sycl::info::device::max_compute_units>();

m_maxShmemPerBlock =
d.template get_info<sycl::info::device::local_mem_size>();
m_queue = q;
// guard pushing to all_queues
{
std::scoped_lock lock(mutex);
all_queues.push_back(&m_queue);
}
const sycl::device& d = m_queue->get_device();

for (auto& usm_mem : m_indirectKernelMem) {
usm_mem.reset(*m_queue, m_instance_id);
}
m_maxWorkgroupSize =
d.template get_info<sycl::info::device::max_work_group_size>();
// FIXME_SYCL this should give the correct value for NVIDIA GPUs
m_maxConcurrency =
m_maxWorkgroupSize * 2 *
d.template get_info<sycl::info::device::max_compute_units>();

} else {
std::ostringstream msg;
msg << "Kokkos::Experimental::SYCL::initialize(...) FAILED";
m_maxShmemPerBlock =
d.template get_info<sycl::info::device::local_mem_size>();

if (!ok_init) {
msg << " : Already initialized";
}
Kokkos::Impl::throw_runtime_exception(msg.str());
for (auto& usm_mem : m_indirectKernelMem) {
usm_mem.reset(*m_queue, m_instance_id);
}

#ifdef KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED
Expand Down

0 comments on commit c692a81

Please sign in to comment.