Skip to content

Commit

Permalink
Gather one hop neighbors (#2117)
Browse files Browse the repository at this point in the history
Add utilities to enable multi gpu gathering of adjacency lists to be used for mnmg sampling.

Authors:
  - Kumar Aatish (https://github.com/kaatish)

Approvers:
  - Andrei Schaffer (https://github.com/aschaffer)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #2117
  • Loading branch information
kaatish committed Mar 21, 2022
1 parent a024581 commit 6c3a469
Show file tree
Hide file tree
Showing 9 changed files with 1,156 additions and 151 deletions.
260 changes: 250 additions & 10 deletions cpp/include/cugraph/detail/decompress_matrix_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace detail {
int32_t constexpr decompress_matrix_partition_block_size = 1024;

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
__global__ void for_all_major_for_all_nbr_mid_degree(
__global__ void decompress_to_edgelist_mid_degree(
matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
vertex_t major_first,
vertex_t major_last,
Expand Down Expand Up @@ -67,7 +67,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree(
}

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
__global__ void for_all_major_for_all_nbr_high_degree(
__global__ void decompress_to_edgelist_high_degree(
matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
vertex_t major_first,
vertex_t major_last,
Expand Down Expand Up @@ -111,10 +111,10 @@ void decompress_matrix_partition_to_fill_edgelist_majors(
detail::decompress_matrix_partition_block_size,
handle.get_device_properties().maxGridSize[0]);

detail::for_all_major_for_all_nbr_high_degree<<<update_grid.num_blocks,
update_grid.block_size,
0,
handle.get_stream()>>>(
detail::decompress_to_edgelist_high_degree<<<update_grid.num_blocks,
update_grid.block_size,
0,
handle.get_stream()>>>(
matrix_partition,
matrix_partition.get_major_first(),
matrix_partition.get_major_first() + (*segment_offsets)[1],
Expand All @@ -125,10 +125,10 @@ void decompress_matrix_partition_to_fill_edgelist_majors(
detail::decompress_matrix_partition_block_size,
handle.get_device_properties().maxGridSize[0]);

detail::for_all_major_for_all_nbr_mid_degree<<<update_grid.num_blocks,
update_grid.block_size,
0,
handle.get_stream()>>>(
detail::decompress_to_edgelist_mid_degree<<<update_grid.num_blocks,
update_grid.block_size,
0,
handle.get_stream()>>>(
matrix_partition,
matrix_partition.get_major_first() + (*segment_offsets)[1],
matrix_partition.get_major_first() + (*segment_offsets)[2],
Expand Down Expand Up @@ -180,6 +180,246 @@ void decompress_matrix_partition_to_fill_edgelist_majors(
}
}

template <typename vertex_t, typename edge_t, typename weight_t, typename prop_t, bool multi_gpu>
__global__ void partially_decompress_to_edgelist_high_degree(
matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
vertex_t const* input_majors,
edge_t const* input_major_start_offsets,
vertex_t input_major_count,
vertex_t* output_majors,
vertex_t* output_minors,
thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property,
thrust::optional<thrust::tuple<edge_t const*, edge_t*>> global_edge_index)
{
size_t idx = static_cast<size_t>(blockIdx.x);
while (idx < static_cast<size_t>(input_major_count)) {
auto major = input_majors[idx];
auto major_partition_offset = static_cast<size_t>(major - matrix_partition.get_major_first());
vertex_t const* indices{nullptr};
thrust::optional<weight_t const*> weights{thrust::nullopt};
edge_t local_degree{};
thrust::tie(indices, weights, local_degree) =
matrix_partition.get_local_edges(static_cast<vertex_t>(major_partition_offset));
auto major_offset = input_major_start_offsets[idx];
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
output_majors[major_offset + i] = major;
output_minors[major_offset + i] = indices[i];
}
if (property) {
auto input_property = thrust::get<0>(*property)[idx];
prop_t* output_property = thrust::get<1>(*property);
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
output_property[major_offset + i] = input_property;
}
}
if (global_edge_index) {
auto adjacency_list_offset = thrust::get<0>(*global_edge_index)[major_partition_offset];
auto minor_map = thrust::get<1>(*global_edge_index);
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
minor_map[major_offset + i] = adjacency_list_offset + i;
}
}
idx += gridDim.x;
}
}

template <typename vertex_t, typename edge_t, typename weight_t, typename prop_t, bool multi_gpu>
__global__ void partially_decompress_to_edgelist_mid_degree(
matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
vertex_t const* input_majors,
edge_t const* input_major_start_offsets,
vertex_t input_major_count,
vertex_t* output_majors,
vertex_t* output_minors,
thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property,
thrust::optional<thrust::tuple<edge_t const*, edge_t*>> global_edge_index)
{
auto const tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(decompress_matrix_partition_block_size % raft::warp_size() == 0);
auto const lane_id = tid % raft::warp_size();
size_t idx = static_cast<size_t>(tid / raft::warp_size());
while (idx < static_cast<size_t>(input_major_count)) {
auto major = input_majors[idx];
auto major_partition_offset = static_cast<size_t>(major - matrix_partition.get_major_first());
vertex_t const* indices{nullptr};
edge_t local_degree{};
auto major_offset = input_major_start_offsets[idx];
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
output_majors[major_offset + i] = major;
output_minors[major_offset + i] = indices[i];
}
if (property) {
auto input_property = thrust::get<0>(*property)[idx];
prop_t* output_property = thrust::get<1>(*property);
for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) {
output_property[major_offset + i] = input_property;
}
}
if (global_edge_index) {
auto adjacency_list_offset = thrust::get<0>(*global_edge_index)[major_partition_offset];
auto minor_map = thrust::get<1>(*global_edge_index);
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
minor_map[major_offset + i] = adjacency_list_offset + i;
}
}
idx += gridDim.x * (blockDim.x / raft::warp_size());
}
}

template <typename vertex_t, typename edge_t, typename weight_t, typename prop_t, bool multi_gpu>
void partially_decompress_matrix_partition_to_fill_edgelist(
raft::handle_t const& handle,
matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
vertex_t const* input_majors,
edge_t const* input_major_start_offsets,
std::vector<vertex_t> const& segment_offsets,
vertex_t* majors,
vertex_t* minors,
thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property,
thrust::optional<thrust::tuple<edge_t const*, edge_t*>> global_edge_index)
{
auto execution_policy = handle.get_thrust_policy();
static_assert(detail::num_sparse_segments_per_vertex_partition == 3);
auto& comm = handle.get_comms();
auto const comm_rank = comm.get_rank();
if (segment_offsets[1] - segment_offsets[0] > 0) {
raft::grid_1d_block_t update_grid(segment_offsets[1] - segment_offsets[0],
detail::decompress_matrix_partition_block_size,
handle.get_device_properties().maxGridSize[0]);

detail::partially_decompress_to_edgelist_high_degree<<<update_grid.num_blocks,
update_grid.block_size,
0,
handle.get_stream()>>>(
matrix_partition,
input_majors + segment_offsets[0],
input_major_start_offsets,
segment_offsets[1],
majors,
minors,
property ? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[0], thrust::get<1>(*property)))
: thrust::nullopt,
global_edge_index);
}
if (segment_offsets[2] - segment_offsets[1] > 0) {
raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1],
detail::decompress_matrix_partition_block_size,
handle.get_device_properties().maxGridSize[0]);

detail::partially_decompress_to_edgelist_mid_degree<<<update_grid.num_blocks,
update_grid.block_size,
0,
handle.get_stream()>>>(
matrix_partition,
input_majors + segment_offsets[1],
input_major_start_offsets + segment_offsets[1] - segment_offsets[0],
segment_offsets[2] - segment_offsets[1],
majors,
minors,
property ? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[1], thrust::get<1>(*property)))
: thrust::nullopt,
global_edge_index);
}
if (segment_offsets[3] - segment_offsets[2] > 0) {
thrust::for_each(
execution_policy,
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(segment_offsets[3] - segment_offsets[2]),
[matrix_partition,
input_majors = input_majors + segment_offsets[2],
input_major_start_offsets =
input_major_start_offsets + segment_offsets[2] - segment_offsets[0],
majors,
minors,
property = property
? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[2], thrust::get<1>(*property)))
: thrust::nullopt,
global_edge_index] __device__(auto idx) {
auto major = input_majors[idx];
auto major_offset = input_major_start_offsets[idx];
auto major_partition_offset =
static_cast<size_t>(major - matrix_partition.get_major_first());
vertex_t const* indices{nullptr};
thrust::optional<weight_t const*> weights{thrust::nullopt};
edge_t local_degree{};
thrust::tie(indices, weights, local_degree) =
matrix_partition.get_local_edges(major_partition_offset);
thrust::fill(
thrust::seq, majors + major_offset, majors + major_offset + local_degree, major);
thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset);
if (property) {
auto major_input_property = thrust::get<0>(*property)[idx];
auto minor_output_property = thrust::get<1>(*property);
thrust::fill(thrust::seq,
minor_output_property + major_offset,
minor_output_property + major_offset + local_degree,
major_input_property);
}
if (global_edge_index) {
auto adjacency_list_offset = thrust::get<0>(*global_edge_index)[major_partition_offset];
auto minor_map = thrust::get<1>(*global_edge_index);
thrust::sequence(thrust::seq,
minor_map + major_offset,
minor_map + major_offset + local_degree,
adjacency_list_offset);
}
});
}
if (matrix_partition.get_dcs_nzd_vertex_count() &&
(*(matrix_partition.get_dcs_nzd_vertex_count()) > 0)) {
thrust::for_each(
execution_policy,
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(segment_offsets[4] - segment_offsets[3]),
[matrix_partition,
input_majors = input_majors + segment_offsets[3],
input_major_start_offsets =
input_major_start_offsets + segment_offsets[3] - segment_offsets[0],
majors,
minors,
property = property
? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[3], thrust::get<1>(*property)))
: thrust::nullopt,
global_edge_index] __device__(auto idx) {
auto major = input_majors[idx];
auto major_offset = input_major_start_offsets[idx];
auto major_idx = matrix_partition.get_major_hypersparse_idx_from_major_nocheck(major);
if (major_idx) {
vertex_t const* indices{nullptr};
thrust::optional<weight_t const*> weights{thrust::nullopt};
edge_t local_degree{};
thrust::tie(indices, weights, local_degree) =
matrix_partition.get_local_edges(*major_idx);
thrust::fill(
thrust::seq, majors + major_offset, majors + major_offset + local_degree, major);
thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset);
if (property) {
auto major_input_property = thrust::get<0>(*property)[idx];
auto minor_output_property = thrust::get<1>(*property);
thrust::fill(thrust::seq,
minor_output_property + major_offset,
minor_output_property + major_offset + local_degree,
major_input_property);
}
if (global_edge_index) {
auto major_partition_offset =
static_cast<size_t>(*major_idx - matrix_partition.get_major_first());
auto adjacency_list_offset = thrust::get<0>(*global_edge_index)[major_partition_offset];
auto minor_map = thrust::get<1>(*global_edge_index);
thrust::sequence(thrust::seq,
minor_map + major_offset,
minor_map + major_offset + local_degree,
adjacency_list_offset);
}
}
});
}
}

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
void decompress_matrix_partition_to_edgelist(
raft::handle_t const& handle,
Expand Down

0 comments on commit 6c3a469

Please sign in to comment.