Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement CUDA Portable Collection for PF Clusters #17

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
dc5200e
Initial attempt to move to a better SOA with proper length. Minimize …
hatakeyamak Feb 25, 2023
eb17251
Further attempt to optimize memory allocation.
hatakeyamak Feb 27, 2023
c386793
Cleanup DeviceToHos Memcpy.
hatakeyamak Feb 27, 2023
021e4c5
Further progress on output data format.
hatakeyamak Mar 2, 2023
1cf8118
Further progress on output data format.
hatakeyamak Mar 2, 2023
7862cea
Further progress on output data format.
hatakeyamak Mar 2, 2023
b9262c7
Further progress on output data format.
hatakeyamak Mar 2, 2023
54bc37d
More progress on output data format. Now it produces the contracted r…
hatakeyamak Mar 5, 2023
34dd1bd
Further progress on output data format. Some renaming.
hatakeyamak Mar 5, 2023
8175c0f
fillDescription update.
hatakeyamak Mar 6, 2023
5ad730e
fix nseeds initialization.
hatakeyamak Mar 6, 2023
6e03f6c
update
hatakeyamak Mar 7, 2023
2eb0454
Optimize threadsPerBlock for clustering.
hatakeyamak Mar 7, 2023
3d0b32a
Merged PFRecHitAndCluster_GPU_13_0_dataformat from repository hatakey…
jsamudio Jun 2, 2023
9bb5632
first pass introduction of PFCluster Portable Collection, still missi…
jsamudio Jun 2, 2023
dbbb1e5
Added cuda portable collection and working on PFClusterCudaHCAL now, …
jsamudio Jun 7, 2023
07720f8
some progress toward filling the Portable Device Collection, fantisti…
jsamudio Jun 14, 2023
7db5f66
working implementation of portable device collection and copied to ho…
jsamudio Jun 16, 2023
ac0b09a
Working implementation of CUDA DeviceMultiCollection
jsamudio Jun 20, 2023
4c17afa
Added host multi collection and implemantation in PF clustering
jsamudio Jun 21, 2023
b62f89e
cleaning and addition of device functions and shared memory
jsamudio Jun 22, 2023
b7355ad
Removal of extraneous changes
jsamudio Jun 22, 2023
35bd2b2
Merge branch 'PFRecHitAndCluster_GPU_13_0_0_pre4_stable' into dev_PFC…
jsamudio Jun 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions CUDADataFormats/Common/interface/PortableCollectionCommon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#ifndef CUDADataFormats_Common_interface_PortableCollectionCommon_h
#define CUDADataFormats_Common_interface_PortableCollectionCommon_h

#include <cstddef>
#include <type_traits>
#include <array>

namespace portablecollection {

// Note: if there are other uses for this, it could be moved to a central place
template <std::size_t Start, std::size_t End, std::size_t Inc = 1, typename F>
constexpr void constexpr_for(F&& f) {
if constexpr (Start < End) {
f(std::integral_constant<std::size_t, Start>());
constexpr_for<Start + Inc, End, Inc>(std::forward<F>(f));
}
}

template <std::size_t Idx, typename T>
struct CollectionLeaf {
CollectionLeaf() = default;
CollectionLeaf(std::byte* buffer, int32_t elements) : layout_(buffer, elements), view_(layout_) {}
template <std::size_t N>
CollectionLeaf(std::byte* buffer, std::array<int32_t, N> const& sizes)
: layout_(buffer, sizes[Idx]), view_(layout_) {
static_assert(N >= Idx);
}
using Layout = T;
using View = typename Layout::View;
using ConstView = typename Layout::ConstView;
Layout layout_; //
View view_; //!
// Make sure types are not void.
static_assert(not std::is_same<T, void>::value);
};

template <std::size_t Idx, typename T, typename... Args>
struct CollectionImpl : public CollectionLeaf<Idx, T>, public CollectionImpl<Idx + 1, Args...> {
CollectionImpl() = default;
CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf<Idx, T>(buffer, elements) {}

template <std::size_t N>
CollectionImpl(std::byte* buffer, std::array<int32_t, N> const& sizes)
: CollectionLeaf<Idx, T>(buffer, sizes),
CollectionImpl<Idx + 1, Args...>(CollectionLeaf<Idx, T>::layout_.metadata().nextByte(), sizes) {}
};

template <std::size_t Idx, typename T>
struct CollectionImpl<Idx, T> : public CollectionLeaf<Idx, T> {
CollectionImpl() = default;
CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf<Idx, T>(buffer, elements) {}

template <std::size_t N>
CollectionImpl(std::byte* buffer, std::array<int32_t, N> const& sizes) : CollectionLeaf<Idx, T>(buffer, sizes) {
static_assert(N == Idx + 1);
}
};

template <typename... Args>
struct Collections : public CollectionImpl<0, Args...> {};

// return the type at the Idx position in Args...
template <std::size_t Idx, typename... Args>
using TypeResolver = typename std::tuple_element<Idx, std::tuple<Args...>>::type;

// count how many times the type T occurs in Args...
template <typename T, typename... Args>
inline constexpr std::size_t typeCount = ((std::is_same<T, Args>::value ? 1 : 0) + ... + 0);

// count the non-void elements of Args...
template <typename... Args>
inline constexpr std::size_t membersCount = sizeof...(Args);

// if the type T occurs in Tuple, TupleTypeIndex has a static member value with the corresponding index;
// otherwise there is no such data member.
template <typename T, typename Tuple>
struct TupleTypeIndex {};

template <typename T, typename... Args>
struct TupleTypeIndex<T, std::tuple<T, Args...>> {
static_assert(typeCount<T, Args...> == 0, "the requested type appears more than once among the arguments");
static constexpr std::size_t value = 0;
};

template <typename T, typename U, typename... Args>
struct TupleTypeIndex<T, std::tuple<U, Args...>> {
static_assert(not std::is_same_v<T, U>);
static_assert(typeCount<T, Args...> == 1, "the requested type does not appear among the arguments");
static constexpr std::size_t value = 1 + TupleTypeIndex<T, std::tuple<Args...>>::value;
};

// if the type T occurs in Args..., TypeIndex has a static member value with the corresponding index;
// otherwise there is no such data member.
template <typename T, typename... Args>
using TypeIndex = TupleTypeIndex<T, std::tuple<Args...>>;

// return the index where the type T occurs in Args...
template <typename T, typename... Args>
inline constexpr std::size_t typeIndex = TypeIndex<T, Args...>::value;

} // namespace portablecollection

#endif // CUDADataFormats_Common_interface_PortableCollectionCommon_h
207 changes: 207 additions & 0 deletions CUDADataFormats/Common/interface/PortableDeviceCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

#include <cassert>
#include <cstdlib>
#include <optional>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "CUDADataFormats/Common/interface/PortableCollectionCommon.h"

namespace cms::cuda {

Expand Down Expand Up @@ -62,6 +64,211 @@ namespace cms::cuda {
View view_; //!
};

//generic SoA-Based product in device memory
template <typename T0, typename... Args>
class PortableDeviceMultiCollection {
template <typename T>
static constexpr std::size_t count_t_ = portablecollection::typeCount<T, T0, Args...>;

template <typename T>
static constexpr std::size_t index_t_ = portablecollection::typeIndex<T, T0, Args...>;

static constexpr std::size_t members_ = sizeof...(Args) + 1;

public:
using Buffer = cms::cuda::device::unique_ptr<std::byte[]>;
using Implementation = portablecollection::CollectionImpl<0, T0, Args...>;

using SizesArray = std::array<int32_t, members_>;

template <std::size_t Idx = 0>
using Layout = portablecollection::TypeResolver<Idx, T0, Args...>;

template <std::size_t Idx = 0UL>
using View = typename std::tuple_element<Idx, std::tuple<T0, Args...>>::type::View;

template <std::size_t Idx = 0UL>
using ConstView = typename std::tuple_element<Idx, std::tuple<T0, Args...>>::type::ConstView;

private:
template <std::size_t Idx>
using Leaf = portablecollection::CollectionLeaf<Idx, Layout<Idx>>;

template <std::size_t Idx>
Leaf<Idx>& get() {
return static_cast<Leaf<Idx>&>(impl_);
}

template <std::size_t Idx>
Leaf<Idx> const& get() const {
return static_cast<Leaf<Idx> const&>(impl_);
}

template <typename T>
Leaf<index_t_ <T>>& get() {
return static_cast<Leaf<index_t_<T>>&>(impl_);
}

template <typename T>
Leaf<index_t_<T>> const& get() const {
return static_cast<Leaf<index_t_<T>> const&>(impl_);
}

public:
PortableDeviceMultiCollection() = default;

PortableDeviceMultiCollection(int32_t elements, cudaStream_t stream)
: buffer_{cms::cuda::make_device_unique<std::byte[]>(Layout<>::computeDataSize(elements), stream)},
impl_{buffer_.get(), elements} {
assert(reinterpret_cast<uintptr_t>(buffer_.get()) % Layout<>::alignment == 0);
static_assert(members_ == 1);
}

static int32_t computeDataSize(const SizesArray& sizes) {
int32_t ret = 0;
portablecollection::constexpr_for<0, members_>(
[&sizes, &ret](auto i) { ret += Layout<i>::computeDataSize(sizes[i]); });
return ret;
}

PortableDeviceMultiCollection(const SizesArray& sizes, cudaStream_t stream)
// allocate device memory asynchronously on the given work queue
: buffer_{cms::cuda::make_device_unique<std::byte[]>(computeDataSize(sizes), stream)},
impl_{buffer_.get(), sizes} {
portablecollection::constexpr_for<0, members_>(
[&](auto i) { assert(reinterpret_cast<uintptr_t>(buffer_.get()) % Layout<i>::alignment == 0); });
constexpr auto alignment = Layout<0>::alignment;
portablecollection::constexpr_for<1, members_>(
[&alignment](auto i) { static_assert(alignment == Layout<i>::alignment); });
}

// non-copyable
PortableDeviceMultiCollection(PortableDeviceMultiCollection const&) = delete;
PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection const&) = delete;

// movable
PortableDeviceMultiCollection(PortableDeviceMultiCollection&&) = default;
PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection&&) = default;

// default destructor
~PortableDeviceMultiCollection() = default;

// access the View by index
template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
View<Idx>& view() {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const& view() const {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const& const_view() const {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
View<Idx>& operator*() {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const& operator*() const {
return get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
View<Idx>* operator->() {
return &get<Idx>().view_;
}

template <std::size_t Idx = 0, typename = std::enable_if_t<(members_ > Idx)>>
ConstView<Idx> const* operator->() const {
return &get<Idx>().view_;
}

// access the View by type
template <typename T>
typename T::View& view() {
return get<T>().view_;
}

template <typename T>
typename T::ConstView const& view() const {
return get<T>().view_;
}

template <typename T>
typename T::ConstView const& const_view() const {
return get<T>().view_;
}

template <typename T>
typename T::View& operator*() {
return get<T>().view_;
}

template <typename T>
typename T::ConstView const& operator*() const {
return get<T>().view_;
}

template <typename T>
typename T::View* operator->() {
return &get<T>().view_;
}

template <typename T>
typename T::ConstView const* operator->() const {
return &get<T>().view_;
}

// access the Buffer
Buffer& buffer() { return buffer_; }
Buffer const& buffer() const { return buffer_; }
Buffer const& const_buffer() const { return buffer_; }

// Extract the sizes array
SizesArray sizes() const {
SizesArray ret;
portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get<i>().layout_.metadata().size(); });
return ret;
}

size_t bufferSize() const {
SizesArray layoutSize;
size_t bytes;
bytes = 0;
portablecollection::constexpr_for<0, members_>([&](auto i) {
layoutSize[i] = get<i>().layout_.metadata().byteSize();
bytes += layoutSize[i];
});
return bytes;
}


private:
Buffer buffer_; //!
Implementation impl_; // (serialized: this is where the layouts live)
};

// Singleton case does not need to be aliased. A special template covers it.

// This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail.
template <typename T0, typename T1>
using PortableDeviceCollection2 = PortableDeviceMultiCollection<T0, T1>;

template <typename T0, typename T1, typename T2>
using PortableDeviceCollection3 = PortableDeviceMultiCollection<T0, T1, T2>;

template <typename T0, typename T1, typename T2, typename T3>
using PortableDeviceCollection4 = PortableDeviceMultiCollection<T0, T1, T2, T3>;

template <typename T0, typename T1, typename T2, typename T3, typename T4>
using PortableDeviceCollection5 = PortableDeviceMultiCollection<T0, T1, T2, T3, T4>;

} // namespace cms::cuda

#endif // CUDADataFormats_Common_interface_PortableDeviceCollection_h
Loading