Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 4 additions & 116 deletions examples/aligned_accessor/aligned_accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
// https://github.com/rapidsai/raft/pull/725#discussion_r937991701

namespace {
using Kokkos::aligned_accessor;
using Kokkos::detail::aligned_pointer_t;
using Kokkos::detail::assume_aligned_method;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I like that we are adding this. These guys are used only in this example, but we are putting them into the main library (assume_aligned_method and align_attribute_method). I think we should remove them altogether.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, we do not need aligned_pointer_t or assume_aligned_method in Standard C++, so we should not expose them to users.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was mostly to merge Mark's example with the new code. This isn't exposing to the API as they are under detail::

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense that the aligned_accessor example needs to pull in aligned_accessor. However, why does the example need aligned_pointer_t or assume_aligned_method? Wouldn't those just be implementation details of aligned_accessor to help with back-porting it to C++ versions earlier than 20?

using Kokkos::detail::align_attribute_method;

using test_value_type = float;
constexpr std::size_t min_overalignment_factor = 8;
Expand All @@ -43,129 +47,13 @@ constexpr std::size_t min_byte_alignment = min_overalignment_factor * sizeof(flo
// Some compilers have trouble optimizing loops with unsigned or 64-bit index types.
using index_type = int;


// Prefer std::assume_aligned if available, as it is in the C++ Standard.
// Otherwise, use a compiler-specific equivalent if available.

// NOTE (mfh 2022/08/08) BYTE_ALIGNMENT must be unsigned and a power of 2.
#if defined(__cpp_lib_assume_aligned)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) (std::assume_aligned< BYTE_ALIGNMENT >( POINTER ))
constexpr char assume_aligned_method[] = "std::assume_aligned";
#elif defined(__ICL)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#elif defined(__ICC)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#elif defined(__clang__)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#elif defined(__GNUC__)
// __builtin_assume_aligned returns void*
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) reinterpret_cast< ELEMENT_TYPE* >(__builtin_assume_aligned( POINTER, BYTE_ALIGNMENT ))
constexpr char assume_aligned_method[] = "__builtin_assume_aligned";
#else
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#endif

// Some compilers other than Clang or GCC like to define __clang__ or __GNUC__.
// Thus, we order the tests from most to least specific.
#if defined(__ICL)
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __declspec(align_value( BYTE_ALIGNMENT ))
constexpr char align_attribute_method[] = "__declspec(align_value(BYTE_ALIGNMENT))";
#elif defined(__ICC)
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT )))
constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))";
#elif defined(__clang__)
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT )))
constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))";
#else
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT )
constexpr char align_attribute_method[] = "(none)";
#endif

constexpr bool
is_nonzero_power_of_two(const std::size_t x)
{
// Just checking __cpp_lib_int_pow2 isn't enough for some GCC versions.
// The <bit> header exists, but std::has_single_bit does not.
#if defined(__cpp_lib_int_pow2) && __cplusplus >= 202002L
return std::has_single_bit(x);
#else
return x != 0 && (x & (x - 1)) == 0;
#endif
}

template<class ElementType>
constexpr bool
valid_byte_alignment(const std::size_t byte_alignment)
{
return is_nonzero_power_of_two(byte_alignment) && byte_alignment >= alignof(ElementType);
}

// We define aligned_pointer_t through a struct
// so we can check whether the byte alignment is valid.
// This makes it impossible to use the alias
// with an invalid byte alignment.
template<class ElementType, std::size_t byte_alignment>
struct aligned_pointer {
static_assert(valid_byte_alignment<ElementType>(byte_alignment),
"byte_alignment must be a power of two no less than "
"the minimum required alignment of ElementType.");

#if defined(__ICC)
// x86-64 ICC 2021.5.0 emits warning #3186 ("expected typedef declaration") here.
// No other compiler (including Clang, which has a similar type attribute) has this issue.
# pragma warning push
# pragma warning disable 3186
#endif

using type = ElementType* MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( byte_alignment );

#if defined(__ICC)
# pragma warning pop
#endif
};

template<class ElementType, std::size_t byte_alignment>
using aligned_pointer_t = typename aligned_pointer<ElementType, byte_alignment>::type;

template<class ElementType, std::size_t byte_alignment>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Standard C++ way to spell this is std::assume_aligned<byte_alignment>(ptr).

aligned_pointer_t<ElementType, byte_alignment>
bless(ElementType* ptr, std::integral_constant<std::size_t, byte_alignment> /* ba */ )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we consider just calling this assume_aligned? More importantly, why does the example need this? In general, we should favor the example looking as much like Standard C++ as possible. (Yes, I know I wrote the example! but it still could use revision : - ) .)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I tried to make as minimum possible changes to the example as possible, but now I'm reconsidering that since, well, the example is supposed to be useful to learn how to use the API. So I might rewrite a lot of it (and possible just make it require C++20)

Copy link
Contributor

@mhoemmen mhoemmen Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for considering this! There's nothing wrong with back-porting, but I think I would want both the example and the aligned_accessor implementation to look as much like Standard C++ as possible. The latter could be achieved just by changing some names.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, will do then, I should have a new version tomorrow with all the changes

{
return MDSPAN_IMPL_ASSUME_ALIGNED( ElementType, ptr, byte_alignment );
}

template<class ElementType, std::size_t byte_alignment>
struct aligned_accessor {
using offset_policy = Kokkos::default_accessor<ElementType>;
using element_type = ElementType;
using reference = ElementType&;
using data_handle_type = aligned_pointer_t<ElementType, byte_alignment>;

constexpr aligned_accessor() noexcept = default;

MDSPAN_TEMPLATE_REQUIRES(
class OtherElementType,
std::size_t other_byte_alignment,
/* requires */ (std::is_convertible<OtherElementType(*)[], element_type(*)[]>::value && other_byte_alignment == byte_alignment)
)
constexpr aligned_accessor(aligned_accessor<OtherElementType, other_byte_alignment>) noexcept {}

constexpr reference access(data_handle_type p, size_t i) const noexcept {
// This may declare alignment twice, depending on
// if we have an attribute for marking pointer types.
return MDSPAN_IMPL_ASSUME_ALIGNED( ElementType, p, byte_alignment )[i];
}

constexpr typename offset_policy::data_handle_type
offset(data_handle_type p, size_t i) const noexcept {
return p + i;
}
};

template<class ElementType>
struct delete_raw {
void operator()(ElementType* p) const {
Expand Down
209 changes: 209 additions & 0 deletions include/experimental/__p0009_bits/aligned_accessor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2019) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/


// NOTE: This code is prematurely taken from an example based on
// https://github.com/kokkos/mdspan/pull/176

#pragma once

#include "macros.hpp"
#include "trait_backports.hpp"
#include "default_accessor.hpp"
#include "extents.hpp"
#include <bit>
#include <cassert>
#include <iostream>
#include <memory>
#include <type_traits>

// If we don't have bitcast, we should use memcpy
#ifndef __cpp_lib_bit_cast
#include <cstring>
#endif

namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
namespace detail {

// Prefer std::assume_aligned if available, as it is in the C++ Standard.
// Otherwise, use a compiler-specific equivalent if available.

// NOTE (mfh 2022/08/08) BYTE_ALIGNMENT must be unsigned and a power of 2.
#if defined(__cpp_lib_assume_aligned)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) (std::assume_aligned< BYTE_ALIGNMENT >( POINTER ))
constexpr char assume_aligned_method[] = "std::assume_aligned";
#elif defined(__ICL)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#elif defined(__ICC)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#elif defined(__clang__)
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#elif defined(__GNUC__)
// __builtin_assume_aligned returns void*
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) reinterpret_cast< ELEMENT_TYPE* >(__builtin_assume_aligned( POINTER, BYTE_ALIGNMENT ))
constexpr char assume_aligned_method[] = "__builtin_assume_aligned";
#else
# define MDSPAN_IMPL_ASSUME_ALIGNED( ELEMENT_TYPE, POINTER, BYTE_ALIGNMENT ) POINTER
constexpr char assume_aligned_method[] = "(none)";
#endif

// Some compilers other than Clang or GCC like to define __clang__ or __GNUC__.
// Thus, we order the tests from most to least specific.
#if defined(__ICL)
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __declspec(align_value( BYTE_ALIGNMENT ))
constexpr char align_attribute_method[] = "__declspec(align_value(BYTE_ALIGNMENT))";
#elif defined(__ICC)
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT )))
constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))";
#elif defined(__clang__)
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT ) __attribute__((align_value( BYTE_ALIGNMENT )))
constexpr char align_attribute_method[] = "__attribute__((align_value(BYTE_ALIGNMENT)))";
#else
# define MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( BYTE_ALIGNMENT )
constexpr char align_attribute_method[] = "(none)";
#endif

constexpr bool
is_nonzero_power_of_two(const std::size_t x)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we consider just calling this has_single_bit? That would make the implementation look as much like Standard C++ as possible.

{
// Just checking __cpp_lib_int_pow2 isn't enough for some GCC versions.
// The <bit> header exists, but std::has_single_bit does not.
#if defined(__cpp_lib_int_pow2) && __cplusplus >= 202002L
return std::has_single_bit(x);
#else
return x != 0 && (x & (x - 1)) == 0;
#endif
}

template<class ElementType>
constexpr bool
valid_byte_alignment(const std::size_t byte_alignment)
{
return is_nonzero_power_of_two(byte_alignment) && byte_alignment >= alignof(ElementType);
}

// We define aligned_pointer_t through a struct
// so we can check whether the byte alignment is valid.
// This makes it impossible to use the alias
// with an invalid byte alignment.
template<class T, std::size_t Alignment>
struct aligned_pointer {
static_assert(valid_byte_alignment<T>(Alignment),
"Alignment must be a power of two no less than "
"the minimum required alignment of T.");
using type = T* MDSPAN_IMPL_ALIGN_VALUE_ATTRIBUTE( Alignment );
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is necessary is it? If you have a static_assert inside the aligned_accessor you can't get to aligned_accessor::data_handle_type without this being checked or?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original point of this was that some compiler (I think MSVC?) needed a special attribute marker on pointers to declare overalignment. That's not the Standard C++ way of doing things, but it might be reasonable for pre-C++20 back-ports. If you have C++20, the Standard way to do this is to use assume_aligned.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thus, I think this is acceptable, as long as it is not exposed to users.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah but we are only using that typedef anyway, so we might as well just declare it exactly this way inside aligned_accessor? I.e. aligned_accessor<T, 4>::data_handle_type is not aligned_pointer<T,4> its aligned_pointer<T,4>::type so we can make that typedef inline in aligned_accessor or?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@crtrott Ah, right, that makes total sense then. At least aligned_pointer_t<T, 4> would be more idiomatic.



template<class ElementType, std::size_t byte_alignment>
using aligned_pointer_t = typename aligned_pointer<ElementType, byte_alignment>::type;
} // namespace detail

template<size_t Alignment, class T>
#ifdef __cpp_lib_bit_cast // Only can be constexpr if we have bit_cast
constexpr
#endif
bool is_sufficiently_aligned(T* ptr) {
// Note this mandate is not what is currently in the standard
// See https://cplusplus.github.io/LWG/issue4290
static_assert(detail::valid_byte_alignment<T>(Alignment),
"Alignment must be a power of two no less than "
"the minimum required alignment of T.");
#ifdef __cpp_lib_bit_cast
auto dst = std::bit_cast<std::uintptr_t>(ptr);
#else
// Will work but non-constexpr
std::uintptr_t dst;
std::memcpy(&dst, &ptr, sizeof(std::uintptr_t));
#endif
return !(dst & (Alignment - 1));
}

template<class ElementType, std::size_t ByteAlignment>
struct aligned_accessor {
using offset_policy = default_accessor<ElementType>;
using element_type = ElementType;
using reference = ElementType&;
using data_handle_type = detail::aligned_pointer_t<ElementType, ByteAlignment>;

static constexpr size_t byte_alignment = ByteAlignment;

constexpr aligned_accessor() noexcept = default;

MDSPAN_TEMPLATE_REQUIRES(
class OtherElementType,
std::size_t OtherByteAlignment,
/* requires */ (std::is_convertible<OtherElementType(*)[], element_type(*)[]>::value && OtherByteAlignment >= byte_alignment)
)
constexpr aligned_accessor(aligned_accessor<OtherElementType, OtherByteAlignment>) noexcept {}

MDSPAN_TEMPLATE_REQUIRES(
class OtherElementType,
/* requires */ (std::is_convertible<OtherElementType(*)[], element_type(*)[]>::value)
)
constexpr explicit aligned_accessor(default_accessor<OtherElementType>) noexcept {}

MDSPAN_TEMPLATE_REQUIRES(
class OtherElementType,
/* requires */ (std::is_convertible<element_type(*)[], OtherElementType(*)[]>::value)
)
constexpr operator default_accessor<OtherElementType>() const noexcept {
return {};
}

constexpr reference access(data_handle_type p, size_t i) const noexcept {
// This may declare alignment twice, depending on
// if we have an attribute for marking pointer types.
return MDSPAN_IMPL_ASSUME_ALIGNED( ElementType, p, byte_alignment )[i];
}

constexpr typename offset_policy::data_handle_type
offset(data_handle_type p, size_t i) const noexcept {
return p + i;
}
};

} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE
1 change: 1 addition & 0 deletions include/mdspan/mdspan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "../experimental/__p0009_bits/layout_left.hpp"
#include "../experimental/__p0009_bits/layout_right.hpp"
#include "../experimental/__p0009_bits/macros.hpp"
#include "../experimental/__p0009_bits/aligned_accessor.hpp"
#if MDSPAN_HAS_CXX_17
#include "../experimental/__p2642_bits/layout_padded.hpp"
#include "../experimental/__p2630_bits/submdspan.hpp"
Expand Down
7 changes: 4 additions & 3 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ if(MDSPAN_USE_SYSTEM_GTEST)
find_package(GTest CONFIG REQUIRED)
else()
include(FetchContent)

if (MSVC)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
endif()
Expand All @@ -42,14 +42,14 @@ else()
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.17.0
)

# TODO CMake 3.28, we can pass EXCLUDE_FROM_ALL directly to fetchcontent_makeavailable
fetchcontent_getproperties(googletest)
if (NOT googletest_POPULATED)
fetchcontent_populate(googletest)
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

add_library(GTest::gtest_main ALIAS gtest_main)
endif()

Expand Down Expand Up @@ -94,6 +94,7 @@ if(NOT CMAKE_CXX_STANDARD STREQUAL "14")
mdspan_add_test(test_layout_padded_left ENABLE_PRECONDITIONS)
mdspan_add_test(test_layout_padded_right ENABLE_PRECONDITIONS)
endif()
mdspan_add_test(test_aligned_accessor)
# both of those don't work yet since its using vector
if(NOT MDSPAN_ENABLE_CUDA AND NOT MDSPAN_ENABLE_HIP)
mdspan_add_test(test_mdarray_ctors)
Expand Down
Loading