Skip to content

Commit

Permalink
Check for overflow during backend initialization (Cuda, HIP, SYCL) (k…
Browse files Browse the repository at this point in the history
…okkos#6159)

* Fix potential overflow

* Rewrite update condition to avoid overflow

* Avoid overflow when setting scratch flags

* Restrict overflow check to unsigned integral types

* Rework `multiply_overflow`

* Use `||` instead of `or` (MSVC fix)

* Fix SYCL build

- use correct namespace
- only used overflow detection when necessary

* Clean up unit tests

* Use correct variables and consistent formatting

* Add a FIXME for SYCL sizeScratchGrain

Co-authored-by: Daniel Arndt <arndtd@ornl.gov>

* Add `multiply_overflow_abort` helper

---------

Co-authored-by: Daniel Arndt <arndtd@ornl.gov>
  • Loading branch information
cz4rs and masterleinad committed Jul 26, 2023
1 parent 4d1c6c3 commit ced2451
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 52 deletions.
48 changes: 28 additions & 20 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <Cuda/Kokkos_Cuda_UniqueToken.hpp>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Tools.hpp>
#include <impl/Kokkos_CheckedIntegerOps.hpp>
#include <impl/Kokkos_DeviceManagement.hpp>
#include <impl/Kokkos_ExecSpaceManager.hpp>

Expand Down Expand Up @@ -110,6 +111,13 @@ int cuda_kernel_arch() {
return arch;
}

constexpr auto sizeScratchGrain =
sizeof(Cuda::size_type[Impl::CudaTraits::WarpSize]);

std::size_t scratch_count(const std::size_t size) {
return (size + sizeScratchGrain - 1) / sizeScratchGrain;
}

} // namespace

Kokkos::View<uint32_t *, Kokkos::CudaSpace> cuda_global_unique_token_locks(
Expand Down Expand Up @@ -438,47 +446,45 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default

//----------------------------------------------------------------------------

using ScratchGrain = Cuda::size_type[Impl::CudaTraits::WarpSize];
enum { sizeScratchGrain = sizeof(ScratchGrain) };

Cuda::size_type *CudaInternal::scratch_flags(const std::size_t size) const {
if (verify_is_initialized("scratch_flags") &&
m_scratchFlagsCount * sizeScratchGrain < size) {
m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchFlagsCount < scratch_count(size)) {
m_scratchFlagsCount = scratch_count(size);

using Record =
Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;

if (m_scratchFlags) Record::decrement(Record::get_record(m_scratchFlags));

Record *const r =
Record::allocate(Kokkos::CudaSpace(), "Kokkos::InternalScratchFlags",
(sizeof(ScratchGrain) * m_scratchFlagsCount));
std::size_t alloc_size =
multiply_overflow_abort(m_scratchFlagsCount, sizeScratchGrain);
Record *const r = Record::allocate(
Kokkos::CudaSpace(), "Kokkos::InternalScratchFlags", alloc_size);

Record::increment(r);

m_scratchFlags = reinterpret_cast<size_type *>(r->data());

KOKKOS_IMPL_CUDA_SAFE_CALL(
cudaMemset(m_scratchFlags, 0, m_scratchFlagsCount * sizeScratchGrain));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemset(m_scratchFlags, 0, alloc_size));
}

return m_scratchFlags;
}

Cuda::size_type *CudaInternal::scratch_space(const std::size_t size) const {
if (verify_is_initialized("scratch_space") &&
m_scratchSpaceCount * sizeScratchGrain < size) {
m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchSpaceCount < scratch_count(size)) {
m_scratchSpaceCount = scratch_count(size);

using Record =
Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;

if (m_scratchSpace) Record::decrement(Record::get_record(m_scratchSpace));

Record *const r =
Record::allocate(Kokkos::CudaSpace(), "Kokkos::InternalScratchSpace",
(sizeof(ScratchGrain) * m_scratchSpaceCount));
std::size_t alloc_size =
multiply_overflow_abort(m_scratchSpaceCount, sizeScratchGrain);
Record *const r = Record::allocate(
Kokkos::CudaSpace(), "Kokkos::InternalScratchSpace", alloc_size);

Record::increment(r);

Expand All @@ -490,18 +496,20 @@ Cuda::size_type *CudaInternal::scratch_space(const std::size_t size) const {

Cuda::size_type *CudaInternal::scratch_unified(const std::size_t size) const {
if (verify_is_initialized("scratch_unified") && m_scratchUnifiedSupported &&
m_scratchUnifiedCount * sizeScratchGrain < size) {
m_scratchUnifiedCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchUnifiedCount < scratch_count(size)) {
m_scratchUnifiedCount = scratch_count(size);

using Record =
Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>;

if (m_scratchUnified)
Record::decrement(Record::get_record(m_scratchUnified));

Record *const r = Record::allocate(
Kokkos::CudaHostPinnedSpace(), "Kokkos::InternalScratchUnified",
(sizeof(ScratchGrain) * m_scratchUnifiedCount));
std::size_t alloc_size =
multiply_overflow_abort(m_scratchUnifiedCount, sizeScratchGrain);
Record *const r =
Record::allocate(Kokkos::CudaHostPinnedSpace(),
"Kokkos::InternalScratchUnified", alloc_size);

Record::increment(r);

Expand Down
41 changes: 25 additions & 16 deletions core/src/HIP/Kokkos_HIP_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <HIP/Kokkos_HIP_Instance.hpp>
#include <HIP/Kokkos_HIP.hpp>
#include <HIP/Kokkos_HIP_Space.hpp>
#include <impl/Kokkos_CheckedIntegerOps.hpp>
#include <impl/Kokkos_Error.hpp>

/*--------------------------------------------------------------------------*/
Expand Down Expand Up @@ -59,9 +60,19 @@ Kokkos::View<uint32_t *, HIPSpace> hip_global_unique_token_locks(
} // namespace Kokkos

namespace Kokkos {

namespace Impl {

namespace {

using ScratchGrain = Kokkos::HIP::size_type[Impl::HIPTraits::WarpSize];
constexpr auto sizeScratchGrain = sizeof(ScratchGrain);

std::size_t scratch_count(const std::size_t size) {
return (size + sizeScratchGrain - 1) / sizeScratchGrain;
}

} // namespace

//----------------------------------------------------------------------------

int HIPInternal::concurrency() {
Expand Down Expand Up @@ -189,21 +200,19 @@ void HIPInternal::initialize(hipStream_t stream, bool manage_stream) {

//----------------------------------------------------------------------------

using ScratchGrain = Kokkos::HIP::size_type[Impl::HIPTraits::WarpSize];
enum { sizeScratchGrain = sizeof(ScratchGrain) };

Kokkos::HIP::size_type *HIPInternal::scratch_space(const std::size_t size) {
if (verify_is_initialized("scratch_space") &&
m_scratchSpaceCount * sizeScratchGrain < size) {
m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchSpaceCount < scratch_count(size)) {
m_scratchSpaceCount = scratch_count(size);

using Record = Kokkos::Impl::SharedAllocationRecord<Kokkos::HIPSpace, void>;

if (m_scratchSpace) Record::decrement(Record::get_record(m_scratchSpace));

Record *const r =
Record::allocate(Kokkos::HIPSpace(), "Kokkos::InternalScratchSpace",
(sizeScratchGrain * m_scratchSpaceCount));
std::size_t alloc_size =
multiply_overflow_abort(m_scratchSpaceCount, sizeScratchGrain);
Record *const r = Record::allocate(
Kokkos::HIPSpace(), "Kokkos::InternalScratchSpace", alloc_size);

Record::increment(r);

Expand All @@ -215,23 +224,23 @@ Kokkos::HIP::size_type *HIPInternal::scratch_space(const std::size_t size) {

Kokkos::HIP::size_type *HIPInternal::scratch_flags(const std::size_t size) {
if (verify_is_initialized("scratch_flags") &&
m_scratchFlagsCount * sizeScratchGrain < size) {
m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchFlagsCount < scratch_count(size)) {
m_scratchFlagsCount = scratch_count(size);

using Record = Kokkos::Impl::SharedAllocationRecord<Kokkos::HIPSpace, void>;

if (m_scratchFlags) Record::decrement(Record::get_record(m_scratchFlags));

Record *const r =
Record::allocate(Kokkos::HIPSpace(), "Kokkos::InternalScratchFlags",
(sizeScratchGrain * m_scratchFlagsCount));
std::size_t alloc_size =
multiply_overflow_abort(m_scratchFlagsCount, sizeScratchGrain);
Record *const r = Record::allocate(
Kokkos::HIPSpace(), "Kokkos::InternalScratchFlags", alloc_size);

Record::increment(r);

m_scratchFlags = reinterpret_cast<size_type *>(r->data());

KOKKOS_IMPL_HIP_SAFE_CALL(
hipMemset(m_scratchFlags, 0, m_scratchFlagsCount * sizeScratchGrain));
KOKKOS_IMPL_HIP_SAFE_CALL(hipMemset(m_scratchFlags, 0, alloc_size));
}

return m_scratchFlags;
Expand Down
44 changes: 28 additions & 16 deletions core/src/SYCL/Kokkos_SYCL_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,24 @@

#include <Kokkos_Core.hpp> //kokkos_malloc

#include <impl/Kokkos_CheckedIntegerOps.hpp>

namespace Kokkos {
namespace Experimental {
namespace Impl {

namespace {

// FIXME_SYCL Should be a multiple of the maximum subgroup size.
static constexpr auto sizeScratchGrain =
sizeof(Kokkos::Experimental::SYCL::size_type[32]);

std::size_t scratch_count(const std::size_t size) {
return (size + sizeScratchGrain - 1) / sizeScratchGrain;
}

} // namespace

std::vector<std::optional<sycl::queue>*> SYCLInternal::all_queues;
std::mutex SYCLInternal::mutex;

Expand Down Expand Up @@ -231,22 +245,21 @@ void SYCLInternal::finalize() {
}

sycl::device_ptr<void> SYCLInternal::scratch_space(const std::size_t size) {
const size_type sizeScratchGrain =
sizeof(Kokkos::Experimental::SYCL::size_type);
if (verify_is_initialized("scratch_space") &&
m_scratchSpaceCount * sizeScratchGrain < size) {
m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchSpaceCount < scratch_count(size)) {
m_scratchSpaceCount = scratch_count(size);

using Record = Kokkos::Impl::SharedAllocationRecord<
Kokkos::Experimental::SYCLDeviceUSMSpace, void>;

if (nullptr != m_scratchSpace)
Record::decrement(Record::get_record(m_scratchSpace));

Record* const r =
Record::allocate(Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue),
"Kokkos::Experimental::SYCL::InternalScratchSpace",
(sizeScratchGrain * m_scratchSpaceCount));
std::size_t alloc_size = Kokkos::Impl::multiply_overflow_abort(
m_scratchSpaceCount, sizeScratchGrain);
Record* const r = Record::allocate(
Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue),
"Kokkos::Experimental::SYCL::InternalScratchSpace", alloc_size);

Record::increment(r);

Expand All @@ -257,22 +270,21 @@ sycl::device_ptr<void> SYCLInternal::scratch_space(const std::size_t size) {
}

sycl::device_ptr<void> SYCLInternal::scratch_flags(const std::size_t size) {
const size_type sizeScratchGrain =
sizeof(Kokkos::Experimental::SYCL::size_type);
if (verify_is_initialized("scratch_flags") &&
m_scratchFlagsCount * sizeScratchGrain < size) {
m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain;
m_scratchFlagsCount < scratch_count(size)) {
m_scratchFlagsCount = scratch_count(size);

using Record = Kokkos::Impl::SharedAllocationRecord<
Kokkos::Experimental::SYCLDeviceUSMSpace, void>;

if (nullptr != m_scratchFlags)
Record::decrement(Record::get_record(m_scratchFlags));

Record* const r =
Record::allocate(Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue),
"Kokkos::Experimental::SYCL::InternalScratchFlags",
(sizeScratchGrain * m_scratchFlagsCount));
std::size_t alloc_size = Kokkos::Impl::multiply_overflow_abort(
m_scratchFlagsCount, sizeScratchGrain);
Record* const r = Record::allocate(
Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue),
"Kokkos::Experimental::SYCL::InternalScratchFlags", alloc_size);

Record::increment(r);

Expand Down
53 changes: 53 additions & 0 deletions core/src/impl/Kokkos_CheckedIntegerOps.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_CHECKED_INTEGER_OPS_HPP
#define KOKKOS_CHECKED_INTEGER_OPS_HPP

#include <type_traits>

#include <impl/Kokkos_Error.hpp>

namespace Kokkos {
namespace Impl {

template <typename T>
std::enable_if_t<std::is_integral_v<T>, bool> constexpr multiply_overflow(
T a, T b, T& res) {
static_assert(std::is_unsigned_v<T>,
"Operation not implemented for signed integers.");
auto product = a * b;
if ((a == 0) || (b == 0) || (a == product / b)) {
res = product;
return false;
} else {
return true;
}
}

template <typename T>
T multiply_overflow_abort(T a, T b) {
T result;
if (multiply_overflow(a, b, result))
Kokkos::abort("Arithmetic overflow detected.");

return result;
}

} // namespace Impl
} // namespace Kokkos

#endif // KOKKOS_CHECKED_INTEGER_OPS_HPP
1 change: 1 addition & 0 deletions core/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL)
AtomicViews
BitManipulationBuiltins
BlockSizeDeduction
CheckedIntegerOps
CommonPolicyConstructors
CommonPolicyInterface
Complex
Expand Down
50 changes: 50 additions & 0 deletions core/unit_test/TestCheckedIntegerOps.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <gtest/gtest.h>
#include <impl/Kokkos_CheckedIntegerOps.hpp>
#include <limits>

namespace {

TEST(TEST_CATEGORY, checked_integer_operations_multiply_overflow) {
{
auto result = 1u;
auto is_overflow = Kokkos::Impl::multiply_overflow(1u, 2u, result);
EXPECT_EQ(result, 2u);
EXPECT_FALSE(is_overflow);
}
{
auto result = 1u;
auto is_overflow = Kokkos::Impl::multiply_overflow(
std::numeric_limits<unsigned>::max(), 2u, result);
EXPECT_TRUE(is_overflow);
}
}

TEST(TEST_CATEGORY, checked_integer_operations_multiply_overflow_abort) {
{
auto result = Kokkos::Impl::multiply_overflow_abort(1u, 2u);
EXPECT_EQ(result, 2u);
}
{
ASSERT_DEATH(Kokkos::Impl::multiply_overflow_abort(
std::numeric_limits<unsigned>::max(), 2u),
"Arithmetic overflow detected.");
}
}

} // namespace

0 comments on commit ced2451

Please sign in to comment.