Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-23.04' into join-row-op…
Browse files Browse the repository at this point in the history
…erators
  • Loading branch information
divyegala committed Feb 20, 2023
2 parents 36fc5e9 + 94bbc82 commit 5d75db8
Show file tree
Hide file tree
Showing 91 changed files with 4,454 additions and 2,945 deletions.
8 changes: 5 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,11 @@ if buildAll || hasArg libcudf; then
LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}')
MSG="${MSG}<br/>libcudf.so size: $LIBCUDF_FS"
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log
BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
echo "Metrics output dir: [$BMR_DIR]"
mkdir -p ${BMR_DIR}
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
fi

if [[ ${INSTALL_TARGET} != "" ]]; then
Expand Down
28 changes: 27 additions & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -14,3 +14,29 @@ rapids-logger "Begin cpp build"
rapids-mamba-retry mambabuild conda/recipes/libcudf

rapids-upload-conda-to-s3 cpp

echo "++++++++++++++++++++++++++++++++++++++++++++"

if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then
ls -l ${RAPIDS_ARTIFACTS_DIR}
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"

FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log
if [[ -f $FILE ]]; then
echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m"
UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log
rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"

FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html
if [[ -f $FILE ]]; then
echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m"
UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html
rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"
16 changes: 0 additions & 16 deletions ci/test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,5 @@ for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
fi
done

if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
rapids-logger "Memcheck gtests with rmm_mode=cuda"
export GTEST_CUDF_RMM_MODE=cuda
COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
test_name=$(basename ${gt})
if [[ "$test_name" == "ERROR_TEST" ]]; then
continue
fi
echo "Running gtest $test_name"
${COMPUTE_SANITIZER_CMD} ${gt} | tee "${RAPIDS_TESTS_DIR}${test_name}.cs.log"
done
unset GTEST_CUDF_RMM_MODE
# TODO: test-results/*.cs.log are processed in CI
fi

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ build:
- SCCACHE_IDLE_TIMEOUT
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- RAPIDS_ARTIFACTS_DIR

requirements:
build:
Expand Down
11 changes: 7 additions & 4 deletions cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <thrust/distance.h>
#include <thrust/scan.h>

#include <stdexcept>

namespace cudf {
namespace detail {

Expand Down Expand Up @@ -242,7 +244,7 @@ static sizes_to_offsets_iterator<ScanIterator, LastType> make_sizes_to_offsets_i
* auto const bytes = cudf::detail::sizes_to_offsets(
* d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
* CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
* "Size of output exceeds column size limit");
* "Size of output exceeds column size limit", std::overflow_error);
* @endcode
*
* @tparam SizesIterator Iterator type for input of the scan using addition operation
Expand Down Expand Up @@ -282,8 +284,8 @@ auto sizes_to_offsets(SizesIterator begin,
* The return also includes the total number of elements -- the last element value from the
* scan.
*
* @throw cudf::logic_error if the total size of the scan (last element) greater than maximum value
* of `size_type`
* @throw std::overflow_error if the total size of the scan (last element) greater than maximum
* value of `size_type`
*
* @tparam InputIterator Used as input to scan to set the offset values
* @param begin The beginning of the input sequence
Expand Down Expand Up @@ -317,7 +319,8 @@ std::pair<std::unique_ptr<column>, size_type> make_offsets_child_column(
auto const total_elements = sizes_to_offsets(input_itr, input_itr + count + 1, d_offsets, stream);
CUDF_EXPECTS(
total_elements <= static_cast<decltype(total_elements)>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit");
"Size of output exceeds column size limit",
std::overflow_error);

offsets_column->set_null_count(0);
return std::pair(std::move(offsets_column), static_cast<size_type>(total_elements));
Expand Down
4 changes: 3 additions & 1 deletion cpp/include/cudf/lists/filling.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,6 +53,7 @@ namespace cudf::lists {
* @throws cudf::logic_error if @p sizes column is not of integer types.
* @throws cudf::logic_error if any input column has nulls.
* @throws cudf::logic_error if @p starts and @p sizes columns do not have the same size.
* @throws std::overflow_error if the output column would exceed the column size limit.
*
* @param starts First values in the result sequences.
* @param sizes Numbers of values in the result sequences.
Expand Down Expand Up @@ -90,6 +91,7 @@ std::unique_ptr<column> sequences(
* @throws cudf::logic_error if any input column has nulls.
* @throws cudf::logic_error if @p starts and @p steps columns have different types.
* @throws cudf::logic_error if @p starts, @p steps, and @p sizes columns do not have the same size.
* @throws std::overflow_error if the output column would exceed the column size limit.
*
* @param starts First values in the result sequences.
* @param steps Increment values for the result sequences.
Expand Down
7 changes: 5 additions & 2 deletions cpp/include/cudf/strings/detail/strings_children.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>

#include <stdexcept>

namespace cudf {
namespace strings {
namespace detail {
Expand All @@ -35,7 +37,7 @@ namespace detail {
* @brief Creates child offsets and chars columns by applying the template function that
* can be used for computing the output size of each string as well as create the output
*
* @throws cudf::logic_error if the output strings column exceeds the column size limit
* @throws std::overflow_error if the output strings column exceeds the column size limit
*
* @tparam SizeAndExecuteFunction Function must accept an index and return a size.
* It must also have members d_offsets and d_chars which are set to
Expand Down Expand Up @@ -78,7 +80,8 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
auto const bytes =
cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit");
"Size of output exceeds column size limit",
std::overflow_error);

// Now build the chars column
std::unique_ptr<column> chars_column =
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,9 @@ struct column_to_strings_fn {
auto child_view = lists_column_view(column).get_sliced_child(stream_);
auto constexpr child_index = lists_column_view::child_column_index;
auto list_string = [&]() {
auto child_string = [&]() {
// nulls are replaced due to special handling of all-null lists as empty lists
// by join_list_elements
auto child_string_with_null = [&]() {
if (child_view.type().id() == type_id::STRUCT) {
return (*this).template operator()<cudf::struct_view>(
child_view,
Expand All @@ -431,7 +433,9 @@ struct column_to_strings_fn {
} else {
return cudf::type_dispatcher(child_view.type(), *this, child_view);
}
}();
};
auto child_string = cudf::strings::detail::replace_nulls(
child_string_with_null()->view(), narep, stream_, rmm::mr::get_current_device_resource());
auto const list_child_string =
column_view(column.type(),
column.size(),
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/lists/sequences.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include <limits>
#include <optional>
#include <stdexcept>

namespace cudf::lists {
namespace detail {
Expand Down Expand Up @@ -169,7 +170,8 @@ std::unique_ptr<column> sequences(column_view const& starts,
auto const n_elements = cudf::detail::sizes_to_offsets(
sizes_input_it, sizes_input_it + n_lists + 1, offsets_begin, stream);
CUDF_EXPECTS(n_elements <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit");
"Size of output exceeds column size limit",
std::overflow_error);

auto child = type_dispatcher(starts.type(),
sequences_dispatcher{},
Expand Down
14 changes: 7 additions & 7 deletions cpp/src/partitioning/partitioning.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/partitioning.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/default_stream.hpp>

Expand Down Expand Up @@ -489,9 +489,9 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
auto row_partition_offset =
cudf::detail::make_zeroed_device_uvector_async<size_type>(num_rows, stream);

auto const device_input = table_device_view::create(table_to_hash, stream);
auto const hasher = row_hasher<hash_function, nullate::DYNAMIC>(
nullate::DYNAMIC{hash_has_nulls}, *device_input, seed);
auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream);
auto const hasher =
row_hasher.device_hasher<hash_function>(nullate::DYNAMIC{hash_has_nulls}, seed);

// If the number of partitions is a power of two, we can compute the partition
// number of each row more efficiently with bitwise operations
Expand Down Expand Up @@ -578,7 +578,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
mr);
});

if (has_nulls(input)) {
if (has_nested_nulls(input)) {
// Use copy_block_partitions to compute a gather map
auto gather_map = compute_gather_map(num_rows,
num_partitions,
Expand Down Expand Up @@ -730,7 +730,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
return std::pair(empty_like(input), std::vector<size_type>(num_partitions, 0));
}

if (has_nulls(table_to_hash)) {
if (has_nested_nulls(table_to_hash)) {
return hash_partition_table<hash_function, true>(
input, table_to_hash, num_partitions, seed, stream, mr);
} else {
Expand Down
41 changes: 18 additions & 23 deletions cpp/src/sort/is_sorted.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
Expand All @@ -36,31 +36,27 @@ namespace detail {

auto is_sorted(cudf::table_view const& in,
std::vector<order> const& column_order,
bool has_nulls,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream)
{
// 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns
auto flattened = structs::detail::flatten_nested_columns(in, column_order, null_precedence);
auto const comparator =
experimental::row::lexicographic::self_comparator{in, column_order, null_precedence, stream};

auto const d_input = table_device_view::create(flattened, stream);
auto const d_column_order = make_device_uvector_async(flattened.orders(), stream);
auto const d_null_precedence = has_nulls
? make_device_uvector_async(flattened.null_orders(), stream)
: rmm::device_uvector<null_order>(0, stream);
if (cudf::detail::has_nested_columns(in)) {
auto const device_comparator = comparator.less<true>(has_nested_nulls(in));

auto comparator = row_lexicographic_comparator(nullate::DYNAMIC{has_nulls},
*d_input,
*d_input,
d_column_order.data(),
d_null_precedence.data());
return thrust::is_sorted(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(in.num_rows()),
device_comparator);
} else {
auto const device_comparator = comparator.less<false>(has_nested_nulls(in));

auto sorted = thrust::is_sorted(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(in.num_rows()),
comparator);

return sorted;
return thrust::is_sorted(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(in.num_rows()),
device_comparator);
}
}

} // namespace detail
Expand All @@ -83,8 +79,7 @@ bool is_sorted(cudf::table_view const& in,
"Number of columns in the table doesn't match the vector null_precedence's size .\n");
}

return detail::is_sorted(
in, column_order, has_nulls(in), null_precedence, cudf::get_default_stream());
return detail::is_sorted(in, column_order, null_precedence, cudf::get_default_stream());
}

} // namespace cudf
7 changes: 5 additions & 2 deletions cpp/src/strings/regex/utilities.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,8 @@

#include <thrust/scan.h>

#include <stdexcept>

namespace cudf {
namespace strings {
namespace detail {
Expand Down Expand Up @@ -134,7 +136,8 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
auto const char_bytes =
cudf::detail::sizes_to_offsets(d_offsets, d_offsets + strings_count + 1, d_offsets, stream);
CUDF_EXPECTS(char_bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
"Size of output exceeds column size limit");
"Size of output exceeds column size limit",
std::overflow_error);

// Now build the chars column
std::unique_ptr<column> chars =
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/text/ngrams_tokenize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#include <thrust/transform.h>
#include <thrust/transform_scan.h>

#include <stdexcept>

namespace nvtext {
namespace detail {
namespace {
Expand Down Expand Up @@ -220,7 +222,8 @@ std::unique_ptr<cudf::column> ngrams_tokenize(cudf::strings_column_view const& s
chars_offsets.begin(), chars_offsets.end(), chars_offsets.begin(), stream);
CUDF_EXPECTS(
output_chars_size <= static_cast<int64_t>(std::numeric_limits<cudf::size_type>::max()),
"Size of output exceeds column size limit");
"Size of output exceeds column size limit",
std::overflow_error);

// This will contain the size in bytes of each ngram to generate
rmm::device_uvector<cudf::size_type> ngram_sizes(total_ngrams, stream);
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ ConfigureTest(
groupby/count_scan_tests.cpp
groupby/count_tests.cpp
groupby/covariance_tests.cpp
groupby/groupby_test_util.cpp
groupby/groups_tests.cpp
groupby/keys_tests.cpp
groupby/lists_tests.cpp
Expand Down

0 comments on commit 5d75db8

Please sign in to comment.