Skip to content

Commit

Permalink
Fix building cugraph with CCCL main (#4404)
Browse files Browse the repository at this point in the history
Similar to rapidsai/cudf#15552, we are testing [building RAPIDS with CCCL's main branch](NVIDIA/cccl#1667) to get ahead of any breaking changes.

Authors:
  - Paul Taylor (https://github.com/trxcllnt)
  - Ralph Liu (https://github.com/nv-rliu)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Ray Bell (https://github.com/raybellwaves)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Jake Awe (https://github.com/AyodeAwe)

URL: #4404
  • Loading branch information
trxcllnt committed May 29, 2024
1 parent 169d162 commit 4c797bf
Show file tree
Hide file tree
Showing 19 changed files with 138 additions and 154 deletions.
5 changes: 5 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ FROM ${BASE} as pip-base

ENV DEFAULT_VIRTUAL_ENV=rapids

RUN apt update -y \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
libblas-dev liblapack-dev \
&& rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;

FROM ${BASE} as conda-base

ENV DEFAULT_CONDA_ENV=rapids
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/cuda11.8-conda/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
8 changes: 2 additions & 6 deletions .devcontainer/cuda11.8-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,16 @@
"args": {
"CUDA": "11.8",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04"
"BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
],
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
"version": "1.15.0"
},
"ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
"version": "11.8",
"installcuBLAS": true,
Expand All @@ -28,7 +25,6 @@
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/ucx",
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/cuda12.2-conda/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
8 changes: 2 additions & 6 deletions .devcontainer/cuda12.2-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,16 @@
"args": {
"CUDA": "12.2",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04"
"BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
],
"hostRequirements": {"gpu": "optional"},
"features": {
"ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
"version": "1.15.0"
},
"ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
"version": "12.2",
"installcuBLAS": true,
Expand All @@ -28,7 +25,6 @@
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/ucx",
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -196,5 +196,5 @@ jobs:
extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
build_command: |
sccache -z;
build-all --verbose -j$(nproc --ignore=1);
build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON;
sccache -s;
4 changes: 2 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ set(CUGRAPH_CXX_FLAGS "")
set(CUGRAPH_CUDA_FLAGS "")

if(CMAKE_COMPILER_IS_GNUCXX)
list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
endif(CMAKE_COMPILER_IS_GNUCXX)


message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}")

list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings)
list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings)
list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable)
list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all)

Expand Down
9 changes: 5 additions & 4 deletions cpp/include/cugraph/utilities/device_functors.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -78,13 +78,14 @@ struct indirection_t {

template <typename index_t, typename Iterator>
struct indirection_if_idx_valid_t {
using value_type = typename thrust::iterator_traits<Iterator>::value_type;
Iterator first{};
index_t invalid_idx{};
typename thrust::iterator_traits<Iterator>::value_type invalid_value{};
value_type invalid_value{};

__device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
__device__ value_type operator()(index_t i) const
{
return (i != invalid_idx) ? *(first + i) : invalid_value;
return (i != invalid_idx) ? static_cast<value_type>(*(first + i)) : invalid_value;
}
};

Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cugraph/utilities/mask_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <raft/core/handle.hpp>

#include <cuda/functional>
#include <thrust/copy.h>
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
Expand Down Expand Up @@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz
handle.get_thrust_policy(),
thrust::make_counting_iterator(size_t{0}),
thrust::make_counting_iterator(packed_bool_size(num_bits)),
[mask_first, num_bits] __device__(size_t i) {
cuda::proclaim_return_type<size_t>([mask_first, num_bits] __device__(size_t i) -> size_t {
auto word = *(mask_first + i);
if ((i + 1) * packed_bools_per_word() > num_bits) {
word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
}
return static_cast<size_t>(__popc(word));
},
}),
size_t{0},
thrust::plus<size_t>{});
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/community/detail/common_methods.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <cugraph/detail/utility_wrappers.hpp>
#include <cugraph/graph_functions.hpp>

#include <cuda/functional>
#include <thrust/binary_search.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
Expand Down Expand Up @@ -178,7 +179,7 @@ weight_t compute_modularity(
handle.get_thrust_policy(),
cluster_weights.begin(),
cluster_weights.end(),
[] __device__(weight_t p) { return p * p; },
cuda::proclaim_return_type<weight_t>([] __device__(weight_t p) -> weight_t { return p * p; }),
weight_t{0},
thrust::plus<weight_t>());

Expand Down
15 changes: 9 additions & 6 deletions cpp/src/community/legacy/louvain.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@

#include <cugraph/dendrogram.hpp>
#include <cugraph/legacy/graph.hpp>

#ifdef TIMING
#include <cugraph/utilities/high_res_timer.hpp>
#endif

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <cuda/functional>
#include <thrust/copy.h>
#include <thrust/distance.h>
#include <thrust/execution_policy.h>
Expand Down Expand Up @@ -141,12 +143,13 @@ class Louvain {
handle_.get_thrust_policy(),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(graph.number_of_vertices),
[d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
vertex_t community) {
return ((d_inc[community] / total_edge_weight) - resolution *
(d_deg[community] * d_deg[community]) /
(total_edge_weight * total_edge_weight));
},
cuda::proclaim_return_type<weight_t>(
[d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
vertex_t community) -> weight_t {
return ((d_inc[community] / total_edge_weight) -
resolution * (d_deg[community] * d_deg[community]) /
(total_edge_weight * total_edge_weight));
}),
weight_t{0.0},
thrust::plus<weight_t>());

Expand Down
15 changes: 9 additions & 6 deletions cpp/src/components/weakly_connected_components_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include <rmm/device_uvector.hpp>

#include <cuda/functional>
#include <thrust/binary_search.h>
#include <thrust/copy.h>
#include <thrust/distance.h>
Expand Down Expand Up @@ -400,9 +401,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
handle.get_thrust_policy(),
new_root_candidates.begin(),
new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0),
[vertex_partition, degrees = degrees.data()] __device__(auto v) {
return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
},
cuda::proclaim_return_type<edge_t>(
[vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
}),
edge_t{0},
thrust::plus<edge_t>{});

Expand Down Expand Up @@ -642,9 +644,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
handle.get_thrust_policy(),
thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()),
thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()),
[vertex_partition, degrees = degrees.data()] __device__(auto v) {
return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
},
cuda::proclaim_return_type<edge_t>(
[vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
}),
edge_t{0},
thrust::plus<edge_t>());

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/detail/utility_wrappers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <rmm/exec_policy.hpp>

#include <cuda/functional>
#include <thrust/count.h>
#include <thrust/distance.h>
#include <thrust/functional.h>
Expand Down Expand Up @@ -139,7 +140,8 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
rmm::exec_policy(stream_view),
edge_first,
edge_first + num_edges,
[] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); },
cuda::proclaim_return_type<vertex_t>(
[] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }),
vertex_t{0},
thrust::maximum<vertex_t>());
}
Expand Down
1 change: 1 addition & 0 deletions cpp/src/prims/kv_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "prims/detail/optional_dataframe_buffer.hpp"

#include <cugraph/graph.hpp>
#include <cugraph/utilities/dataframe_buffer.hpp>
#include <cugraph/utilities/device_functors.cuh>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
std::make_unique<kv_store_t<vertex_t, edge_src_value_t, true>>(
std::move(majors),
std::move(edge_major_values),
invalid_vertex_id<vertex_t>::value,
edge_src_value_t{},
true,
handle.get_stream());
}
Expand Down
36 changes: 19 additions & 17 deletions cpp/src/structure/graph_view_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges(
execution_policy,
thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2],
thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3],
[edge_partition] __device__(auto major) {
cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
vertex_t const* indices{nullptr};
[[maybe_unused]] edge_t edge_offset{};
Expand All @@ -365,7 +365,7 @@ edge_t count_edge_partition_multi_edges(
if (indices[i - 1] == indices[i]) { ++count; }
}
return count;
},
}),
edge_t{0},
thrust::plus<edge_t>{});
}
Expand All @@ -374,19 +374,21 @@ edge_t count_edge_partition_multi_edges(
execution_policy,
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())),
[edge_partition, major_start_offset = (*segment_offsets)[3]] __device__(auto idx) {
auto major_idx =
major_start_offset + idx; // major_offset != major_idx in the hypersparse region
vertex_t const* indices{nullptr};
[[maybe_unused]] edge_t edge_offset{};
edge_t local_degree{};
thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
edge_t count{0};
for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted
if (indices[i - 1] == indices[i]) { ++count; }
}
return count;
},
cuda::proclaim_return_type<edge_t>(
[edge_partition,
major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t {
auto major_idx =
major_start_offset + idx; // major_offset != major_idx in the hypersparse region
vertex_t const* indices{nullptr};
[[maybe_unused]] edge_t edge_offset{};
edge_t local_degree{};
thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
edge_t count{0};
for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted
if (indices[i - 1] == indices[i]) { ++count; }
}
return count;
}),
edge_t{0},
thrust::plus<edge_t>{});
}
Expand All @@ -398,7 +400,7 @@ edge_t count_edge_partition_multi_edges(
thrust::make_counting_iterator(edge_partition.major_range_first()),
thrust::make_counting_iterator(edge_partition.major_range_first()) +
edge_partition.major_range_size(),
[edge_partition] __device__(auto major) {
cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
vertex_t const* indices{nullptr};
[[maybe_unused]] edge_t edge_offset{};
Expand All @@ -409,7 +411,7 @@ edge_t count_edge_partition_multi_edges(
if (indices[i - 1] == indices[i]) { ++count; }
}
return count;
},
}),
edge_t{0},
thrust::plus<edge_t>{});
}
Expand Down
Loading

0 comments on commit 4c797bf

Please sign in to comment.