From 2277377b59abbad1ba14f03c9f1e16ba1a4c9c64 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 29 May 2024 14:05:33 -0700 Subject: [PATCH 1/6] Null handling for `NaN` and `inf` Signed-off-by: Nghia Truong --- cpp/src/unary/cast_ops.cu | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 98c412f805d..77949e232d2 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -15,11 +15,13 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include @@ -294,8 +296,8 @@ struct dispatch_unary_cast_to { std::make_unique(type, size, rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::copy_bitmask(input, stream, mr), - input.null_count()); + rmm::device_buffer{}, + 0); mutable_column_view output_mutable = *output; @@ -308,6 +310,25 @@ struct dispatch_unary_cast_to { output_mutable.begin(), fixed_point_unary_cast{scale}); + if constexpr (cudf::is_floating_point()) { + // For floating-point values, beside input nulls, we also need to set nulls for the output + // rows corresponding to NaN and inf in the input. + auto const d_input_ptr = column_device_view::create(input, stream); + auto const is_valid = [d_input = *d_input_ptr] __device__(auto const idx) { + if (d_input.is_null(idx)) { return false; } + auto const value = d_input.element(idx); + return !std::isnan(value) && !std::isinf(value); + }; + auto [null_mask, null_count] = cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + is_valid, + stream, + mr); + output->set_null_mask(std::move(null_mask), null_count); + } else { + output->set_null_mask(detail::copy_bitmask(input, stream, mr), input.null_count()); + } + return output; } From 77c7e981d135880fad965f88e59605e2b5228349 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 29 May 2024 14:18:19 -0700 Subject: [PATCH 2/6] Fix compile error Signed-off-by: Nghia Truong --- cpp/src/unary/cast_ops.cu | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 77949e232d2..14fa08dd257 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -221,6 +221,25 @@ std::unique_ptr rescale(column_view input, } }; +/** + * @brief Check if a floating point value is convertible to fixed point type. + * + * A floating point value is convertible only if: + * - It is not `NaN` and `inf`, and + * - Its rounded value is within the range that can represented by the target fixed point type. + */ +template +struct is_convertible_floating_point { + column_device_view d_input; + // TODO: enable_if for only float types + bool __device__ operator()(size_type idx) const + { + if (d_input.is_null(idx)) { return false; } + auto const value = d_input.element(idx); + return !std::isnan(value) && !std::isinf(value); + } +}; + template struct dispatch_unary_cast_to { column_view input; @@ -314,17 +333,13 @@ struct dispatch_unary_cast_to { // For floating-point values, beside input nulls, we also need to set nulls for the output // rows corresponding to NaN and inf in the input. auto const d_input_ptr = column_device_view::create(input, stream); - auto const is_valid = [d_input = *d_input_ptr] __device__(auto const idx) { - if (d_input.is_null(idx)) { return false; } - auto const value = d_input.element(idx); - return !std::isnan(value) && !std::isinf(value); - }; - auto [null_mask, null_count] = cudf::detail::valid_if(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size), - is_valid, - stream, - mr); - output->set_null_mask(std::move(null_mask), null_count); + auto [null_mask, null_count] = + cudf::detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + is_convertible_floating_point{*d_input_ptr}, + stream, + mr); + if (null_count > 0) { output->set_null_mask(std::move(null_mask), null_count); } } else { output->set_null_mask(detail::copy_bitmask(input, stream, mr), input.null_count()); } From 1701d472bd1094499fd625a41fffe41a074cbdb3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 29 May 2024 14:50:57 -0700 Subject: [PATCH 3/6] Update comments and add assertion Signed-off-by: Nghia Truong --- cpp/src/unary/cast_ops.cu | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 14fa08dd257..cb313cf60d4 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -224,17 +224,21 @@ std::unique_ptr rescale(column_view input, /** * @brief Check if a floating point value is convertible to fixed point type. * - * A floating point value is convertible only if: - * - It is not `NaN` and `inf`, and - * - Its rounded value is within the range that can represented by the target fixed point type. + * A floating point value is convertible if it is not null, not `NaN`, and not `inf`. + * + * Note that convertible input values may still be out of representible range of the target fixed + * point type and need to be checked separately. */ template struct is_convertible_floating_point { column_device_view d_input; - // TODO: enable_if for only float types + bool __device__ operator()(size_type idx) const { + static_assert(std::is_floating_point_v); + if (d_input.is_null(idx)) { return false; } + auto const value = d_input.element(idx); return !std::isnan(value) && !std::isinf(value); } From 0af5575dd8320702782c27b985307a6f9429ddf8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 29 May 2024 14:51:02 -0700 Subject: [PATCH 4/6] Add test Signed-off-by: Nghia Truong --- cpp/tests/unary/cast_tests.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index a82449ffc10..6c1124d252c 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -665,6 +665,27 @@ TYPED_TEST(FixedPointTests, CastFromDouble) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TYPED_TEST(FixedPointTests, CastFromDoubleWithNaNAndInf) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const NaN = std::numeric_limits::quiet_NaN(); + auto const inf = std::numeric_limits::infinity(); + auto const null = 0; + + auto const input = fw_wrapper{1.729, NaN, 172.9, NaN, inf, 1.23, inf}; + auto const expected = fp_wrapper{{1729, null, 172900, null, null, 1230, null}, + {true, false, true, false, false, true, false}, + scale_type{-3}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-3)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastFromDoubleLarge) { using namespace numeric; From 88ae731ce721bacbbd284d79c1c09a76525bf3d3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 29 May 2024 15:49:19 -0700 Subject: [PATCH 5/6] Add `-inf` Signed-off-by: Nghia Truong --- cpp/tests/unary/cast_tests.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index 6c1124d252c..ebeafc82039 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -677,9 +677,9 @@ TYPED_TEST(FixedPointTests, CastFromDoubleWithNaNAndInf) auto const inf = std::numeric_limits::infinity(); auto const null = 0; - auto const input = fw_wrapper{1.729, NaN, 172.9, NaN, inf, 1.23, inf}; - auto const expected = fp_wrapper{{1729, null, 172900, null, null, 1230, null}, - {true, false, true, false, false, true, false}, + auto const input = fw_wrapper{1.729, -inf, NaN, 172.9, -inf, NaN, inf, 1.23, inf}; + auto const expected = fp_wrapper{{1729, null, null, 172900, null, null, null, 1230, null}, + {true, false, false, true, false, false, false, true, false}, scale_type{-3}}; auto const result = cudf::cast(input, make_fixed_point_data_type(-3)); From 27acae663779d018746ff85a6bb250bc08417ede Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 3 Jun 2024 21:10:23 -0700 Subject: [PATCH 6/6] Change comments and use `std::isfinite` Signed-off-by: Nghia Truong --- cpp/src/unary/cast_ops.cu | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index cb313cf60d4..64427326d87 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -226,8 +226,8 @@ std::unique_ptr rescale(column_view input, * * A floating point value is convertible if it is not null, not `NaN`, and not `inf`. * - * Note that convertible input values may still be out of representible range of the target fixed - * point type and need to be checked separately. + * Note that convertible input values may be out of the representable range of the target fixed + * point type. Values out of the representable range need to be checked separately. */ template struct is_convertible_floating_point { @@ -238,9 +238,8 @@ struct is_convertible_floating_point { static_assert(std::is_floating_point_v); if (d_input.is_null(idx)) { return false; } - auto const value = d_input.element(idx); - return !std::isnan(value) && !std::isinf(value); + return std::isfinite(value); } };