rapidsai · rapids-bot · Mar 21, 2022 · Mar 14, 2022 · Mar 14, 2022 · Mar 14, 2022
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -62,8 +62,13 @@ conda activate rapids
 export PATH=$(conda info --base)/envs/rapids/bin:$PATH
 
 gpuci_logger "Install dependencies"
+# Assume libcudf will be installed via cudf. This is done to prevent the
+# following:
+# libcudf = 22.04.00a220315, cudf = 22.04.00a220308
+# where cudf 220308 was chosen possibly because it has fewer/different
+# dependencies and the corresponding recipes think they're compatible when they
+# may not be.
 gpuci_mamba_retry install -y \
-      "libcudf=${MINOR_VERSION}" \
       "cudf=${MINOR_VERSION}" \
       "librmm=${MINOR_VERSION}" \
       "rmm=${MINOR_VERSION}" \

diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
@@ -44,8 +44,8 @@ requirements:
     - boost-cpp>=1.66
     - nccl>=2.9.9
     - ucx-proc=*=gpu
-    - gtest
-    - gmock
+    - gtest=1.10.0  # FIXME: pinned to version in https://github.com/rapidsai/integration/blob/branch-22.04/conda/recipes/versions.yaml
+    - gmock=1.10.0  # FIXME: pinned to version in https://github.com/rapidsai/integration/blob/branch-22.04/conda/recipes/versions.yaml
   run:
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
     - libraft-headers {{ minor_version }}

@@ -180,10 +180,247 @@ void decompress_matrix_partition_to_fill_edgelist_majors(
   }
 }
 
+template <typename vertex_t, typename edge_t, typename weight_t, typename prop_t, bool multi_gpu>
+__global__ void decompress_edgelist_high_degree(
+  matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
+  vertex_t const* input_majors,
+  edge_t const* input_major_start_offsets,
+  vertex_t input_major_count,
+  vertex_t* output_majors,
+  vertex_t* output_minors,
+  thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property)
+{
+  size_t idx = static_cast<size_t>(blockIdx.x);
+  while (idx < static_cast<size_t>(input_major_count)) {
+    auto major                  = input_majors[idx];
+    auto major_partition_offset = static_cast<size_t>(major - matrix_partition.get_major_first());
+    vertex_t const* indices{nullptr};
+    thrust::optional<weight_t const*> weights{thrust::nullopt};
+    edge_t local_degree{};
+    thrust::tie(indices, weights, local_degree) =
+      matrix_partition.get_local_edges(static_cast<vertex_t>(major_partition_offset));
+    auto major_offset = input_major_start_offsets[idx];
+    if (property) {
+      auto input_property     = thrust::get<0>(*property)[idx];
+      prop_t* output_property = thrust::get<1>(*property);
+      for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
+        output_majors[major_offset + i]   = major;
+        output_minors[major_offset + i]   = indices[i];
+        output_property[major_offset + i] = input_property;
+      }
+    } else {
+      for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
+        output_majors[major_offset + i] = major;
+        output_minors[major_offset + i] = indices[i];
+      }
+    }
+    idx += gridDim.x;
+  }
+}
+
+template <typename vertex_t, typename edge_t, typename weight_t, typename prop_t, bool multi_gpu>
+__global__ void decompress_edgelist_mid_degree(
+  matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
+  vertex_t const* input_majors,
+  edge_t const* input_major_start_offsets,
+  vertex_t input_major_count,
+  vertex_t* output_majors,
+  vertex_t* output_minors,
+  thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property)
+{
+  auto const tid = threadIdx.x + blockIdx.x * blockDim.x;
+  static_assert(decompress_matrix_partition_block_size % raft::warp_size() == 0);
+  auto const lane_id = tid % raft::warp_size();
+  size_t idx         = static_cast<size_t>(tid / raft::warp_size());
+  while (idx < static_cast<size_t>(input_major_count)) {
+    auto major                  = input_majors[idx];
+    auto major_partition_offset = static_cast<size_t>(major - matrix_partition.get_major_first());
+    vertex_t const* indices{nullptr};
+    edge_t local_degree{};
+    auto major_offset = input_major_start_offsets[idx];
+    if (property) {
+      auto input_property     = thrust::get<0>(*property)[idx];
+      prop_t* output_property = thrust::get<1>(*property);
+      for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) {
+        output_majors[major_offset + i]   = major;
+        output_minors[major_offset + i]   = indices[i];
+        output_property[major_offset + i] = input_property;
+      }
+    } else {
+      for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
+        output_majors[major_offset + i] = major;
+        output_minors[major_offset + i] = indices[i];
+      }
+    }
+    idx += gridDim.x * (blockDim.x / raft::warp_size());
+  }
+}
+
+template <typename vertex_t, typename edge_t, typename weight_t, typename prop_t, bool multi_gpu>
+void decompress_matrix_partition_to_fill_edgelist(
+  raft::handle_t const& handle,
+  matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> matrix_partition,
+  vertex_t const* input_majors,
+  edge_t const* input_major_start_offsets,
+  std::vector<vertex_t> const& segment_offsets,
+  vertex_t* majors,
+  vertex_t* minors,
+  thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property)
+{
+  auto execution_policy = handle.get_thrust_policy();
+  static_assert(detail::num_sparse_segments_per_vertex_partition == 3);
+  auto& comm           = handle.get_comms();
+  auto const comm_rank = comm.get_rank();
+  if (segment_offsets[1] - segment_offsets[0] > 0) {
+    raft::grid_1d_block_t update_grid(segment_offsets[1] - segment_offsets[0],
+                                      detail::decompress_matrix_partition_block_size,
+                                      handle.get_device_properties().maxGridSize[0]);
+
+    detail::decompress_edgelist_high_degree<<<update_grid.num_blocks,
+                                              update_grid.block_size,
+                                              0,
+                                              handle.get_stream()>>>(
+      matrix_partition,
+      input_majors + segment_offsets[0],
+      input_major_start_offsets,
+      segment_offsets[1],
+      majors,
+      minors,
+      property ? thrust::make_optional(thrust::make_tuple(
+                   thrust::get<0>(*property) + segment_offsets[0], thrust::get<1>(*property)))
+               : thrust::nullopt);
+  }
+  if (segment_offsets[2] - segment_offsets[1] > 0) {
+    raft::grid_1d_warp_t update_grid(segment_offsets[2] - segment_offsets[1],
+                                     detail::decompress_matrix_partition_block_size,
+                                     handle.get_device_properties().maxGridSize[0]);
+
+    detail::decompress_edgelist_mid_degree<<<update_grid.num_blocks,
+                                             update_grid.block_size,
+                                             0,
+                                             handle.get_stream()>>>(
+      matrix_partition,
+      input_majors + segment_offsets[1],
+      input_major_start_offsets + segment_offsets[1] - segment_offsets[0],
+      segment_offsets[2] - segment_offsets[1],
+      majors,
+      minors,
+      property ? thrust::make_optional(thrust::make_tuple(
+                   thrust::get<0>(*property) + segment_offsets[1], thrust::get<1>(*property)))
+               : thrust::nullopt);
+  }
+  if (segment_offsets[3] - segment_offsets[2] > 0) {
+    raft::grid_1d_warp_t update_grid(segment_offsets[3] - segment_offsets[2],
+                                     detail::decompress_matrix_partition_block_size,
+                                     handle.get_device_properties().maxGridSize[0]);
+
+    thrust::for_each(
+      execution_policy,
+      thrust::make_counting_iterator(vertex_t{0}),
+      thrust::make_counting_iterator(segment_offsets[3] - segment_offsets[2]),
+      [matrix_partition,
+       input_majors = input_majors + segment_offsets[2],
+       input_major_start_offsets =
+         input_major_start_offsets + segment_offsets[2] - segment_offsets[0],
+       majors,
+       minors,
+       property = property
+                    ? thrust::make_optional(thrust::make_tuple(
+                        thrust::get<0>(*property) + segment_offsets[2], thrust::get<1>(*property)))
+                    : thrust::nullopt] __device__(auto idx) {
+        auto major        = input_majors[idx];
+        auto major_offset = input_major_start_offsets[idx];
+        auto major_partition_offset =
+          static_cast<size_t>(major - matrix_partition.get_major_first());
+        vertex_t const* indices{nullptr};
+        thrust::optional<weight_t const*> weights{thrust::nullopt};
+        edge_t local_degree{};
+        thrust::tie(indices, weights, local_degree) =
+          matrix_partition.get_local_edges(major_partition_offset);
+        thrust::fill(
+          thrust::seq, majors + major_offset, majors + major_offset + local_degree, major);
+        thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset);
+        if (property) {
+          auto major_input_property  = thrust::get<0>(*property)[idx];
+          auto minor_output_property = thrust::get<1>(*property);
+          thrust::fill(thrust::seq,
+                       minor_output_property + major_offset,
+                       minor_output_property + major_offset + local_degree,
+                       major_input_property);
+        }
+      });
+  }
+  if (matrix_partition.get_dcs_nzd_vertex_count() &&
+      (*(matrix_partition.get_dcs_nzd_vertex_count()) > 0)) {
+    thrust::for_each(
+      execution_policy,
+      thrust::make_counting_iterator(vertex_t{0}),
+      thrust::make_counting_iterator(segment_offsets[4] - segment_offsets[3]),
+      [matrix_partition,
+       input_majors = input_majors + segment_offsets[3],
+       input_major_start_offsets =
+         input_major_start_offsets + segment_offsets[3] - segment_offsets[0],
+       majors,
+       minors,
+       property = property
+                    ? thrust::make_optional(thrust::make_tuple(
+                        thrust::get<0>(*property) + segment_offsets[3], thrust::get<1>(*property)))
+                    : thrust::nullopt] __device__(auto idx) {
+        auto major        = input_majors[idx];
+        auto major_offset = input_major_start_offsets[idx];
+        auto major_idx    = matrix_partition.get_major_hypersparse_idx_from_major_nocheck(major);
+        if (major_idx) {
+          vertex_t const* indices{nullptr};
+          thrust::optional<weight_t const*> weights{thrust::nullopt};
+          edge_t local_degree{};
+          thrust::tie(indices, weights, local_degree) =
+            matrix_partition.get_local_edges(*major_idx);
+          thrust::fill(
+            thrust::seq, majors + major_offset, majors + major_offset + local_degree, major);
+          thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset);
+          if (property) {
+            auto major_input_property  = thrust::get<0>(*property)[idx];
+            auto minor_output_property = thrust::get<1>(*property);
+            thrust::fill(thrust::seq,
+                         minor_output_property + major_offset,
+                         minor_output_property + major_offset + local_degree,
+                         major_input_property);
+          }
+        }
+      });
+  }
+}
+
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+void decompress_matrix_partition_to_edgelist(
+  raft::handle_t const& handle,
+  matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> const matrix_partition,
+  vertex_t* edgelist_majors /* [OUT] */,
+  vertex_t* edgelist_minors /* [OUT] */,
+  std::optional<weight_t*> edgelist_weights /* [OUT] */,
+  std::optional<std::vector<vertex_t>> const& segment_offsets)
+{
+  auto number_of_edges = matrix_partition.get_number_of_edges();
+
+  decompress_matrix_partition_to_fill_edgelist_majors(
+    handle, matrix_partition, edgelist_majors, segment_offsets);
+  thrust::copy(handle.get_thrust_policy(),
+               matrix_partition.get_indices(),
+               matrix_partition.get_indices() + number_of_edges,
+               edgelist_minors);
+  if (edgelist_weights) {
+    thrust::copy(handle.get_thrust_policy(),
+                 *(matrix_partition.get_weights()),
+                 *(matrix_partition.get_weights()) + number_of_edges,
+                 (*edgelist_weights));
+  }
+}
+
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
 void decompress_matrix_partition_to_edgelist(
   raft::handle_t const& handle,
   matrix_partition_device_view_t<vertex_t, edge_t, weight_t, multi_gpu> const matrix_partition,
+  vertex_t const* majors,
   vertex_t* edgelist_majors /* [OUT] */,
   vertex_t* edgelist_minors /* [OUT] */,
   std::optional<weight_t*> edgelist_weights /* [OUT] */,

@@ -158,7 +158,6 @@ partition_information(raft::handle_t const& handle, GraphViewType const& graph_v
  * Collect all the edges that are present in the adjacency lists on the current gpu
  *
  * @tparam GraphViewType Type of the passed non-owning graph object.
- * @tparam EdgeIndexIterator Type of the iterator for edge indices.
  * @tparam GPUIdIterator  Type of the iterator for gpu id identifiers.
  * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
  * handles to various CUDA libraries) to run graph algorithms.
@@ -167,14 +166,14 @@ partition_information(raft::handle_t const& handle, GraphViewType const& graph_v
  * gpus in the column communicator
  * @param active_major_gpu_ids Device vector containing the gpu id associated by every vertex
  * present in active_majors_in_row
- * @param edge_index_first Iterator pointing to the first destination index
+ * @param minor_indices Device vector containing indices for the edges to be gathered on
  * @param indices_per_source Number of indices supplied for every source in the range
  * [vertex_input_first, vertex_input_last)
  * @param global_degree_offset Global degree offset to local adjacency list for every source
  * represented by current gpu
  * @return A tuple of device vector containing the majors, minors and gpu_ids gathered locally
  */
-template <typename GraphViewType, typename EdgeIndexIterator, typename gpu_t>
+template <typename GraphViewType, typename gpu_t>
 std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
            rmm::device_uvector<typename GraphViewType::vertex_type>,
            rmm::device_uvector<gpu_t>>
@@ -183,10 +182,36 @@ gather_local_edges(
   GraphViewType const& graph_view,
   const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors_in_row,
   const rmm::device_uvector<gpu_t>& active_major_gpu_ids,
-  EdgeIndexIterator edge_index_first,
+  rmm::device_uvector<typename GraphViewType::edge_type>& minor_indices,
   typename GraphViewType::edge_type indices_per_major,
   const rmm::device_uvector<typename GraphViewType::edge_type>& global_degree_offsets);
 
+/**
+ * @brief Gather edge list for specified vertices
+ *
+ * Collect all the edges that are present in the adjacency lists on the current gpu
+ *
+ * @tparam GraphViewType Type of the passed non-owning graph object.
+ * @tparam prop_t  Type of the property associated with the majors.
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Non-owning graph object.
+ * @param active_majors_in_row Device vector containing all the vertex id that are processed by
+ * gpus in the column communicator
+ * @param active_major_property Device vector containing the property values associated by every
+ * vertex present in active_majors_in_row
+ * @return A tuple of device vector containing the majors, minors and properties gathered locally
+ */
+template <typename GraphViewType, typename prop_t>
+std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
+           rmm::device_uvector<typename GraphViewType::vertex_type>,
+           rmm::device_uvector<prop_t>>
+gather_one_hop_edgelist(
+  raft::handle_t const& handle,
+  GraphViewType const& graph_view,
+  const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors_in_row,
+  const rmm::device_uvector<prop_t>& active_major_property);
+
 }  // namespace detail
 
 }  // namespace cugraph