From acfcd8abdb6c21548cff9c1ef75a0a7e438a160e Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Thu, 13 Jun 2024 23:48:57 +0000 Subject: [PATCH] Fix int32 overflow for large string columns when computing fragment sizes --- cpp/src/io/parquet/writer_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 6d466748c17..ca15b532d07 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1763,10 +1763,10 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, // for multiple fragments per page to smooth things out. using 2 was too // unbalanced in final page sizes, so using 4 which seems to be a good // compromise at smoothing things out without getting fragment sizes too small. - auto frag_size_fn = [&](auto const& col, size_type col_size) { + auto frag_size_fn = [&](auto const& col, size_t col_size) { int const target_frags_per_page = is_col_fixed_width(col) ? 1 : 4; auto const avg_len = - target_frags_per_page * util::div_rounding_up_safe(col_size, input.num_rows()); + target_frags_per_page * util::div_rounding_up_safe(col_size, input.num_rows()); if (avg_len > 0) { auto const frag_size = util::div_rounding_up_safe(max_page_size_bytes, avg_len); return std::min(max_page_fragment_size, frag_size);