diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 793e3e9ad00a3..8798942c18a36 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -75,6 +75,11 @@ static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = { // clang-format on }; +// Attention: these pre-computed constants might not exactly represent their +// decimal counterparts: +// >>> int(1e38) +// 99999999999999997748809823456034029568 + static constexpr float kFloatPowersOfTen[2 * 38 + 1] = { 1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, 1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f, diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index ebba3029c3c55..ed262131aec06 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -753,7 +753,7 @@ TEST_P(Decimal128ToStringTest, ToString) { const ToStringTestParam& param = GetParam(); const Decimal128 value(param.test_value); const std::string printed_value = value.ToString(param.scale); - ASSERT_EQ(param.expected_string, printed_value); + EXPECT_EQ(param.expected_string, printed_value); } INSTANTIATE_TEST_SUITE_P(Decimal128ToStringTest, Decimal128ToStringTest, @@ -763,14 +763,14 @@ template void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale, const std::string& expected) { ASSERT_OK_AND_ASSIGN(auto dec, Decimal::FromReal(real, precision, scale)); - ASSERT_EQ(dec.ToString(scale), expected); + EXPECT_EQ(dec.ToString(scale), expected); } template void CheckDecimalFromRealIntegerString(Real real, int32_t precision, int32_t scale, const std::string& expected) { ASSERT_OK_AND_ASSIGN(auto dec, Decimal::FromReal(real, precision, scale)); - ASSERT_EQ(dec.ToIntegerString(), expected); + EXPECT_EQ(dec.ToIntegerString(), expected); } template @@ -868,24 +868,27 @@ template class TestDecimalFromRealFloat : public ::testing::Test { protected: std::vector GetValues() { - return {// 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) - FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"}, - FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"}, - FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"}, - FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"}, - // 2**64 - 2**40 (exactly representable in a float) - FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"}, - FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"}, - // 2**64 + 2**41 (exactly representable in a float) - FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"}, - FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"}, - FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"}, - FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"}, - // Almost 10**38 (minus 2**103) - FromFloatTestParam{9.999999e+37f, 38, 0, - "99999986661652122824821048795547566080"}, - FromFloatTestParam{-9.999999e+37f, 38, 0, - "-99999986661652122824821048795547566080"}}; + return { + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) + FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"}, + FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"}, + FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"}, + FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"}, + // 2**64 - 2**40 (exactly representable in a float) + FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"}, + FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"}, + // 2**64 + 2**41 (exactly representable in a float) + FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"}, + FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"}, + FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"}, + FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"}, + // Almost 10**38 (minus 2**103) + FromFloatTestParam{9.999999e+37f, 38, 0, + "99999986661652122824821048795547566080"}, + FromFloatTestParam{-9.999999e+37f, 38, 0, + "-99999986661652122824821048795547566080"}, + // TODO Hand-picked test cases that can involve precision issues + }; } }; TYPED_TEST_SUITE(TestDecimalFromRealFloat, DecimalTypes); @@ -916,28 +919,44 @@ template class TestDecimalFromRealDouble : public ::testing::Test { protected: std::vector GetValues() { - return {// 2**63 + 2**11 (exactly representable in a double's 53 bits of precision) - FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"}, - FromDoubleTestParam{-9.223372036854778e+18, 19, 0, "-9223372036854777856"}, - FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777856"}, - FromDoubleTestParam{-9.223372036854778e+10, 19, 8, "-92233720368.54777856"}, - // 2**64 - 2**11 (exactly representable in a double) - FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"}, - FromDoubleTestParam{-1.844674407370955e+19, 20, 0, "-18446744073709549568"}, - // 2**64 + 2**11 (exactly representable in a double) - FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"}, - FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, "-18446744073709555712"}, - FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5712"}, - FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, "-1844674407370955.5712"}, - // Almost 10**38 (minus 2**73) - FromDoubleTestParam{9.999999999999998e+37, 38, 0, - "99999999999999978859343891977453174784"}, - FromDoubleTestParam{-9.999999999999998e+37, 38, 0, - "-99999999999999978859343891977453174784"}, - FromDoubleTestParam{9.999999999999998e+27, 38, 10, - "9999999999999997885934389197.7453174784"}, - FromDoubleTestParam{-9.999999999999998e+27, 38, 10, - "-9999999999999997885934389197.7453174784"}}; + return { + // 2**63 + 2**11 (exactly representable in a double's 53 bits of precision) + FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"}, + FromDoubleTestParam{-9.223372036854778e+18, 19, 0, "-9223372036854777856"}, + FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777856"}, + FromDoubleTestParam{-9.223372036854778e+10, 19, 8, "-92233720368.54777856"}, + // 2**64 - 2**11 (exactly representable in a double) + FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"}, + FromDoubleTestParam{-1.844674407370955e+19, 20, 0, "-18446744073709549568"}, + // 2**64 + 2**11 (exactly representable in a double) + FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"}, + FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, "-18446744073709555712"}, + FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5712"}, + FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, "-1844674407370955.5712"}, + // Almost 10**38 (minus 2**73) + FromDoubleTestParam{9.999999999999998e+37, 38, 0, + "99999999999999978859343891977453174784"}, + FromDoubleTestParam{-9.999999999999998e+37, 38, 0, + "-99999999999999978859343891977453174784"}, + FromDoubleTestParam{9.999999999999998e+27, 38, 10, + "9999999999999997885934389197.7453174784"}, + FromDoubleTestParam{-9.999999999999998e+27, 38, 10, + "-9999999999999997885934389197.7453174784"}, + // Hand-picked test cases that can involve precision issues. + // More comprehensive testing is done in the PyArrow test suite. + FromDoubleTestParam{999999999999999.0, 16, 1, "999999999999999.0"}, + FromDoubleTestParam{-999999999999999.0, 16, 1, "999999999999999.0"}, + FromDoubleTestParam{9999999999999998.0, 17, 1, "9999999999999998.0"}, + FromDoubleTestParam{-9999999999999998.0, 17, 1, "-9999999999999998.0"}, + FromDoubleTestParam{999999999999999.9, 16, 1, "999999999999999.9"}, + FromDoubleTestParam{-999999999999999.9, 16, 1, "-999999999999999.9"}, + FromDoubleTestParam{9999999987., 38, 22, "9999999987.0000000000000000000000"}, + FromDoubleTestParam{-9999999987., 38, 22, "-9999999987.0000000000000000000000"}, + FromDoubleTestParam{9999999987., 38, 28, + "9999999987.0000000000000000000000000000"}, + FromDoubleTestParam{-9999999987., 38, 28, + "-9999999987.0000000000000000000000000000"}, + }; } }; TYPED_TEST_SUITE(TestDecimalFromRealDouble, DecimalTypes); @@ -952,15 +971,26 @@ TYPED_TEST(TestDecimalFromRealDouble, SuccessConversion) { TYPED_TEST(TestDecimalFromRealDouble, LargeValues) { // Test the entire double range for (int32_t scale = -308; scale <= 308; ++scale) { + ARROW_SCOPED_TRACE("scale = ", scale); double real = std::pow(10.0, static_cast(scale)); CheckDecimalFromRealIntegerString(real, 1, -scale, "1"); } for (int32_t scale = -307; scale <= 306; ++scale) { + ARROW_SCOPED_TRACE("scale = ", scale); double real = 123. * std::pow(10.0, static_cast(scale)); CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12"); CheckDecimalFromRealIntegerString(real, 3, -scale, "123"); CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230"); } + for (int32_t scale = -292; scale <= 291; ++scale) { + // Exactly 16 decimal digits can fit in a double's mantissa + double real = 1234567890123456. * std::pow(10.0, static_cast(scale)); + ARROW_SCOPED_TRACE("scale = ", scale); + CheckDecimalFromRealIntegerString(real, 15, -scale - 1, "123456789012346"); + CheckDecimalFromRealIntegerString(real, 16, -scale, "1234567890123456"); + // Cannot test precision 17 as the trailing digit depends on FP rounding + // during the computation of `real` above. + } } // Additional values that only apply to Decimal256 diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 875d0e613b6ca..39b2491b4bdde 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -15,10 +15,13 @@ # specific language governing permissions and limitations # under the License. +from collections import namedtuple import datetime +import decimal from functools import lru_cache, partial import inspect import itertools +import math import os import pickle import pytest @@ -79,6 +82,8 @@ pa.float64() ] +floating_point_arrow_types = [pa.float32(), pa.float64()] + def test_exported_functions(): # Check that all exported concrete functions can be called with @@ -1818,6 +1823,83 @@ def test_fsl_to_fsl_cast(value_type): fsl.cast(cast_type) +DecimalTypeTraits = namedtuple('DecimalTypeTraits', + ('name', 'factory', 'max_precision')) + +FloatToDecimalCase = namedtuple('FloatToDecimalCase', + ('precision', 'scale', 'float_val')) + +#decimal_type_traits = [DecimalTypeTraits(pa.decimal128, 38), + #DecimalTypeTraits(pa.decimal256, 76)] +decimal_type_traits = [DecimalTypeTraits('decimal128', pa.decimal128, 38)] + + +def largest_scaled_float_not_above(val, scale): + # Find the largest float f such as `f * 10**scale <= val` + assert val >= 0 + float_val = float(val * 10**-scale) + if float_val * 10**scale > val: + # Take the float just below... it *should* satisfy + float_val = math.nextafter(float_val, 0.0) + assert float_val * 10**scale <= val + #print("val =", val, float_val) + return float_val + + +def integral_float_to_decimal_cast_cases(max_precision): + for precision in range(1, max_precision): + #for scale in range(0, min(10, precision)): + for scale in range(0, precision): + abs_minval = 1 + # Exact unscaled limit in the integer domain + abs_maxval = largest_scaled_float_not_above( + 10**precision - 10**scale, scale) + for val in (0, abs_minval, -abs_minval, + abs_maxval, -abs_maxval): + yield FloatToDecimalCase(precision, scale, float(val)) + + +def real_float_to_decimal_cast_cases(max_precision): + for precision in range(1, max_precision): + for scale in range(0, min(10, precision)): + #for scale in range(0, precision): + abs_minval = largest_scaled_float_not_above(1, scale) + abs_maxval = largest_scaled_float_not_above( + 10**precision - 1, scale) + for val in (0, abs_minval, -abs_minval, + abs_maxval, -abs_maxval): + yield FloatToDecimalCase(precision, scale, float(val)) + + +#@pytest.mark.parametrize('float_ty', floating_point_arrow_types, ids=str) +@pytest.mark.parametrize('float_ty', [pa.float64()], ids=str) +@pytest.mark.parametrize('decimal_ty', decimal_type_traits, + ids=lambda v: v.name) +@pytest.mark.parametrize('case_generator', + [integral_float_to_decimal_cast_cases, + real_float_to_decimal_cast_cases], + ids=['integrals', 'reals']) +def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator): + with decimal.localcontext() as ctx: + for case in case_generator(decimal_ty.max_precision): + # Use the Python decimal module to build the expected result + # using the right precision + ctx.prec = case.precision + # XXX which decimal rounding mode is expected?? + s = pa.scalar(case.float_val, type=float_ty) + expected = ctx.create_decimal_from_float(case.float_val) + print(f"{case} => expected = {expected!r}") + cast_to = decimal_ty.factory(case.precision, case.scale) + actual = pc.cast(s, cast_to).as_py() + assert actual == expected + +# TODO add random-generated cases of float-to-decimal cast: +# - by generating random integers and scaling them using decimal.Decimal +# - by generating random floats and converting them to decimal.Decimal +# XXX move these to test_cast.py / test_decimal.py? +# TODO overflow tests + + def test_strptime(): arr = pa.array(["5/1/2020", None, "12/13/1900"])