Skip to content

Commit

Permalink
Fix exclude regex in pre-commit clang-format hook (#16030)
Browse files Browse the repository at this point in the history
The clang-tidy changes in #15894 introduce a new exclude regex list to the pre-commit clang-format hook. However, it was a single character too long, ending with a |. Consequently, the exclude regex matched the empty string, and hence excluded every C++ file.

Fix this, and apply formatting changes to the files that were modified in the interim and were not clang-format compatible.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Bradley Dice (https://github.com/bdice)

URL: #16030
  • Loading branch information
wence- committed Jun 14, 2024
1 parent 24fe359 commit 374ee13
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 56 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ repos:
(?x)^(
^cpp/src/io/parquet/ipc/Schema_generated.h|
^cpp/src/io/parquet/ipc/Message_generated.h|
^cpp/include/cudf_test/cxxopts.hpp|
^cpp/include/cudf_test/cxxopts.hpp
)
- repo: https://github.com/sirosen/texthooks
rev: 0.6.6
Expand Down
107 changes: 54 additions & 53 deletions cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ std::string get_label(std::string const& test_name, nvbench::state const& state)
std::tuple<std::vector<cuio_source_sink_pair>, size_t, size_t> write_file_data(
nvbench::state& state, std::vector<cudf::type_id> const& d_types)
{
auto const cardinality = state.get_int64("cardinality");
auto const run_length = state.get_int64("run_length");
auto const num_cols = state.get_int64("num_cols");
size_t const num_files = get_num_read_threads(state);
size_t const per_file_data_size = get_read_size(state);
auto const cardinality = state.get_int64("cardinality");
auto const run_length = state.get_int64("run_length");
auto const num_cols = state.get_int64("num_cols");
size_t const num_files = get_num_read_threads(state);
size_t const per_file_data_size = get_read_size(state);

std::vector<cuio_source_sink_pair> source_sink_vector;

Expand Down Expand Up @@ -86,7 +86,7 @@ void BM_orc_multithreaded_read_common(nvbench::state& state,
std::vector<cudf::type_id> const& d_types,
std::string const& label)
{
auto const data_size = state.get_int64("total_data_size");
auto const data_size = state.get_int64("total_data_size");
auto const num_threads = state.get_int64("num_threads");

auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
Expand All @@ -104,24 +104,24 @@ void BM_orc_multithreaded_read_common(nvbench::state& state,
{
cudf::scoped_range range{("(read) " + label).c_str()};
state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
[&](nvbench::launch& launch, auto& timer) {
auto read_func = [&](int index) {
auto const stream = streams[index % num_threads];
cudf::io::orc_reader_options read_opts =
cudf::io::orc_reader_options::builder(source_info_vector[index]);
cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource());
};

threads.paused = true;
for (size_t i = 0; i < num_files; ++i) {
threads.submit(read_func, i);
}
timer.start();
threads.paused = false;
threads.wait_for_tasks();
cudf::detail::join_streams(streams, cudf::get_default_stream());
timer.stop();
});
[&](nvbench::launch& launch, auto& timer) {
auto read_func = [&](int index) {
auto const stream = streams[index % num_threads];
cudf::io::orc_reader_options read_opts =
cudf::io::orc_reader_options::builder(source_info_vector[index]);
cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource());
};

threads.paused = true;
for (size_t i = 0; i < num_files; ++i) {
threads.submit(read_func, i);
}
timer.start();
threads.paused = false;
threads.wait_for_tasks();
cudf::detail::join_streams(streams, cudf::get_default_stream());
timer.stop();
});
}

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
Expand Down Expand Up @@ -184,34 +184,35 @@ void BM_orc_multithreaded_read_chunked_common(nvbench::state& state,
cudf::scoped_range range{("(read) " + label).c_str()};
std::vector<cudf::io::table_with_metadata> chunks;
state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
[&](nvbench::launch& launch, auto& timer) {
auto read_func = [&](int index) {
auto const stream = streams[index % num_threads];
cudf::io::orc_reader_options read_opts =
cudf::io::orc_reader_options::builder(source_info_vector[index]);
// divide chunk limits by number of threads so the number of chunks produced is the
// same for all cases. this seems better than the alternative, which is to keep the
// limits the same. if we do that, as the number of threads goes up, the number of
// chunks goes down - so are actually benchmarking the same thing in that case?
auto reader = cudf::io::chunked_orc_reader(
output_limit / num_threads, input_limit / num_threads, read_opts, stream);

// read all the chunks
do {
auto table = reader.read_chunk();
} while (reader.has_next());
};

threads.paused = true;
for (size_t i = 0; i < num_files; ++i) {
threads.submit(read_func, i);
}
timer.start();
threads.paused = false;
threads.wait_for_tasks();
cudf::detail::join_streams(streams, cudf::get_default_stream());
timer.stop();
});
[&](nvbench::launch& launch, auto& timer) {
auto read_func = [&](int index) {
auto const stream = streams[index % num_threads];
cudf::io::orc_reader_options read_opts =
cudf::io::orc_reader_options::builder(source_info_vector[index]);
// divide chunk limits by number of threads so the number of chunks produced is
// the same for all cases. this seems better than the alternative, which is to
// keep the limits the same. if we do that, as the number of threads goes up, the
// number of chunks goes down - so are actually benchmarking the same thing in
// that case?
auto reader = cudf::io::chunked_orc_reader(
output_limit / num_threads, input_limit / num_threads, read_opts, stream);

// read all the chunks
do {
auto table = reader.read_chunk();
} while (reader.has_next());
};

threads.paused = true;
for (size_t i = 0; i < num_files; ++i) {
threads.submit(read_func, i);
}
timer.start();
threads.paused = false;
threads.wait_for_tasks();
cudf::detail::join_streams(streams, cudf::get_default_stream());
timer.stop();
});
}

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
Expand Down Expand Up @@ -249,7 +250,7 @@ void BM_orc_multithreaded_read_chunked_list(nvbench::state& state)
cudf::scoped_range range{label.c_str()};
BM_orc_multithreaded_read_chunked_common(state, {cudf::type_id::LIST}, label);
}
auto const thread_range = std::vector<nvbench::int64_t>{1, 2, 4, 8};
auto const thread_range = std::vector<nvbench::int64_t>{1, 2, 4, 8};
auto const total_data_size = std::vector<nvbench::int64_t>{512 * 1024 * 1024, 1024 * 1024 * 1024};

// mixed data types: fixed width and strings
Expand Down
5 changes: 3 additions & 2 deletions cpp/tests/interop/from_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ std::unique_ptr<cudf::table> get_cudf_table()
{true, false, true, true, true});
columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
{true, false, true, false, true}, {true, false, true, true, false}).release());
{true, false, true, false, true}, {true, false, true, true, false})
.release());
columns.emplace_back(cudf::test::strings_column_wrapper(
{
"",
Expand Down Expand Up @@ -338,7 +339,7 @@ TEST_F(FromArrowTest, ChunkedArray)
std::vector<std::shared_ptr<arrow::Array>>{dict_array1, dict_array2});
auto boolean_array =
get_arrow_array<bool>({true, false, true, false, true}, {true, false, true, true, false});
auto boolean_chunked_array = std::make_shared<arrow::ChunkedArray>(boolean_array);
auto boolean_chunked_array = std::make_shared<arrow::ChunkedArray>(boolean_array);
auto large_string_chunked_array = std::make_shared<arrow::ChunkedArray>(
std::vector<std::shared_ptr<arrow::Array>>{large_string_array_1});

Expand Down

0 comments on commit 374ee13

Please sign in to comment.