Skip to content

Commit

Permalink
Untangle SharedAllocationRecord spaghetti code
Browse files Browse the repository at this point in the history
  • Loading branch information
dalg24 committed Jan 23, 2024
1 parent 34973c7 commit d18ad8f
Show file tree
Hide file tree
Showing 26 changed files with 373 additions and 1,768 deletions.
2 changes: 0 additions & 2 deletions Makefile.targets
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
Expand Down
174 changes: 6 additions & 168 deletions core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

//#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_MemorySpace.hpp>

#include <impl/Kokkos_Tools.hpp>

Expand Down Expand Up @@ -437,160 +436,6 @@ void CudaHostPinnedSpace::impl_deallocate(
namespace Kokkos {
namespace Impl {

#ifdef KOKKOS_ENABLE_DEBUG
SharedAllocationRecord<void, void>
SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record;

SharedAllocationRecord<void, void>
SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record;

SharedAllocationRecord<void, void>
SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::s_root_record;
#endif

//==============================================================================
// <editor-fold desc="SharedAllocationRecord destructors"> {{{1

SharedAllocationRecord<Kokkos::CudaSpace, void>::~SharedAllocationRecord() {
auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size;
m_space.deallocate(m_label.c_str(),
SharedAllocationRecord<void, void>::m_alloc_ptr,
alloc_size, (alloc_size - sizeof(SharedAllocationHeader)));
}

void SharedAllocationRecord<Kokkos::CudaSpace, void>::deep_copy_header_no_exec(
void *ptr, const void *header) {
Kokkos::Cuda exec;
Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(exec, ptr, header,
sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::CudaSpace, "
"void>::SharedAllocationRecord(): fence after copying header from "
"HostSpace");
}

SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::~SharedAllocationRecord() {
m_space.deallocate(m_label.c_str(),
SharedAllocationRecord<void, void>::m_alloc_ptr,
SharedAllocationRecord<void, void>::m_alloc_size,
(SharedAllocationRecord<void, void>::m_alloc_size -
sizeof(SharedAllocationHeader)));
}

SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
void>::~SharedAllocationRecord() {
m_space.deallocate(m_label.c_str(),
SharedAllocationRecord<void, void>::m_alloc_ptr,
SharedAllocationRecord<void, void>::m_alloc_size,
(SharedAllocationRecord<void, void>::m_alloc_size -
sizeof(SharedAllocationHeader)));
}

// </editor-fold> end SharedAllocationRecord destructors }}}1
//==============================================================================

//==============================================================================
// <editor-fold desc="SharedAllocationRecord constructors"> {{{1

SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord(
const Kokkos::CudaSpace &arg_space, const std::string &arg_label,
const size_t arg_alloc_size,
const SharedAllocationRecord<void, void>::function_type arg_dealloc)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_space, arg_label,
arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {

SharedAllocationHeader header;

this->base_t::_fill_host_accessible_header_info(header, arg_label);

// Copy to device memory
Kokkos::Cuda exec;
Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(
exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::CudaSpace, "
"void>::SharedAllocationRecord(): fence after copying header from "
"HostSpace");
}

SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord(
const Kokkos::Cuda &arg_exec_space, const Kokkos::CudaSpace &arg_space,
const std::string &arg_label, const size_t arg_alloc_size,
const SharedAllocationRecord<void, void>::function_type arg_dealloc)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_exec_space, arg_space,
arg_label, arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {

SharedAllocationHeader header;

this->base_t::_fill_host_accessible_header_info(header, arg_label);

// Copy to device memory
Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(arg_exec_space,
RecordBase::m_alloc_ptr, &header,
sizeof(SharedAllocationHeader));
}

SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::SharedAllocationRecord(
const Kokkos::CudaUVMSpace &arg_space, const std::string &arg_label,
const size_t arg_alloc_size,
const SharedAllocationRecord<void, void>::function_type arg_dealloc)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_space, arg_label,
arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {
this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
arg_label);
}

SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::
SharedAllocationRecord(
const Kokkos::CudaHostPinnedSpace &arg_space,
const std::string &arg_label, const size_t arg_alloc_size,
const SharedAllocationRecord<void, void>::function_type arg_dealloc)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_space, arg_label,
arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {
this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
arg_label);
}

// </editor-fold> end SharedAllocationRecord constructors }}}1
//==============================================================================

void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes,
bool to_device) {
if ((ptr == nullptr) || (bytes == 0)) return;
Expand Down Expand Up @@ -619,19 +464,12 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes,

#include <impl/Kokkos_SharedAlloc_timpl.hpp>

namespace Kokkos {
namespace Impl {

// To avoid additional compilation cost for something that's (mostly?) not
// performance sensitive, we explicity instantiate these CRTP base classes here,
// where we have access to the associated *_timpl.hpp header files.
template class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
template class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
template class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
template class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;

} // end namespace Impl
} // end namespace Kokkos
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaUVMSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaHostPinnedSpace);

// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1
//==============================================================================
Expand Down
178 changes: 4 additions & 174 deletions core/src/Cuda/Kokkos_CudaSpace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ class CudaSpace {
cudaStream_t m_stream;

static constexpr const char* m_name = "Cuda";
friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
};

template <>
Expand Down Expand Up @@ -539,179 +538,10 @@ struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

namespace Kokkos {
namespace Impl {

template <>
class SharedAllocationRecord<Kokkos::CudaSpace, void>
: public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
private:
friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;

using RecordBase = SharedAllocationRecord<void, void>;
using base_t =
HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;

SharedAllocationRecord(const SharedAllocationRecord&) = delete;
SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;

#ifdef KOKKOS_ENABLE_DEBUG
static RecordBase s_root_record;
#endif

const Kokkos::CudaSpace m_space;

protected:
~SharedAllocationRecord();
SharedAllocationRecord() = default;

// This constructor does not forward to the one without exec_space arg
// in order to work around https://github.com/kokkos/kokkos/issues/5258
// This constructor is templated so I can't just put it into the cpp file
// like the other constructor.
template <typename ExecutionSpace>
SharedAllocationRecord(
const ExecutionSpace& /*exec_space*/, const Kokkos::CudaSpace& arg_space,
const std::string& arg_label, const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate)
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_space, arg_label,
arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {

SharedAllocationHeader header;

this->base_t::_fill_host_accessible_header_info(header, arg_label);

// Copy to device memory
// workaround for issue with NVCC and MSVC
// https://github.com/kokkos/kokkos/issues/5258
deep_copy_header_no_exec(RecordBase::m_alloc_ptr, &header);
}

SharedAllocationRecord(
const Kokkos::Cuda& exec_space, const Kokkos::CudaSpace& arg_space,
const std::string& arg_label, const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate);

SharedAllocationRecord(
const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate);

// helper function to work around MSVC+NVCC issue
// https://github.com/kokkos/kokkos/issues/5258
static void deep_copy_header_no_exec(void*, const void*);
};

template <>
class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
: public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
private:
friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;

using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
using RecordBase = SharedAllocationRecord<void, void>;

SharedAllocationRecord(const SharedAllocationRecord&) = delete;
SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;

static RecordBase s_root_record;

const Kokkos::CudaUVMSpace m_space;

protected:
~SharedAllocationRecord();
SharedAllocationRecord() = default;

// This constructor does not forward to the one without exec_space arg
// in order to work around https://github.com/kokkos/kokkos/issues/5258
// This constructor is templated so I can't just put it into the cpp file
// like the other constructor.
template <typename ExecutionSpace>
SharedAllocationRecord(
const ExecutionSpace& /*exec_space*/,
const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate)
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_space, arg_label,
arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {
this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
arg_label);
}

SharedAllocationRecord(
const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate);
};

template <>
class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
: public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
private:
friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;

using RecordBase = SharedAllocationRecord<void, void>;
using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;

SharedAllocationRecord(const SharedAllocationRecord&) = delete;
SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;

static RecordBase s_root_record;

const Kokkos::CudaHostPinnedSpace m_space;

protected:
~SharedAllocationRecord();
SharedAllocationRecord() = default;

// This constructor does not forward to the one without exec_space arg
// in order to work around https://github.com/kokkos/kokkos/issues/5258
// This constructor is templated so I can't just put it into the cpp file
// like the other constructor.
template <typename ExecutionSpace>
SharedAllocationRecord(
const ExecutionSpace& /*exec_space*/,
const Kokkos::CudaHostPinnedSpace& arg_space,
const std::string& arg_label, const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate)
: base_t(
#ifdef KOKKOS_ENABLE_DEBUG
&SharedAllocationRecord<Kokkos::CudaHostPinnedSpace,
void>::s_root_record,
#endif
Impl::checked_allocation_with_header(arg_space, arg_label,
arg_alloc_size),
sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc,
arg_label),
m_space(arg_space) {
this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr,
arg_label);
}

SharedAllocationRecord(
const Kokkos::CudaHostPinnedSpace& arg_space,
const std::string& arg_label, const size_t arg_alloc_size,
const RecordBase::function_type arg_dealloc = &base_t::deallocate);
};

} // namespace Impl
} // namespace Kokkos
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION(
Kokkos::CudaSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaUVMSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaHostPinnedSpace);

//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
Expand Down

0 comments on commit d18ad8f

Please sign in to comment.