Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Jun 6, 2023
1 parent 7f8ccb5 commit 9ea5dd6
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 43 deletions.
5 changes: 5 additions & 0 deletions cpp/src/arrow/util/decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = {
// clang-format on
};

// Attention: these pre-computed constants might not exactly represent their
// decimal counterparts:
// >>> int(1e38)
// 99999999999999997748809823456034029568

static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
Expand Down
116 changes: 73 additions & 43 deletions cpp/src/arrow/util/decimal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ TEST_P(Decimal128ToStringTest, ToString) {
const ToStringTestParam& param = GetParam();
const Decimal128 value(param.test_value);
const std::string printed_value = value.ToString(param.scale);
ASSERT_EQ(param.expected_string, printed_value);
EXPECT_EQ(param.expected_string, printed_value);
}

INSTANTIATE_TEST_SUITE_P(Decimal128ToStringTest, Decimal128ToStringTest,
Expand All @@ -763,14 +763,14 @@ template <typename Decimal, typename Real>
void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale,
const std::string& expected) {
ASSERT_OK_AND_ASSIGN(auto dec, Decimal::FromReal(real, precision, scale));
ASSERT_EQ(dec.ToString(scale), expected);
EXPECT_EQ(dec.ToString(scale), expected);
}

template <typename Decimal, typename Real>
void CheckDecimalFromRealIntegerString(Real real, int32_t precision, int32_t scale,
const std::string& expected) {
ASSERT_OK_AND_ASSIGN(auto dec, Decimal::FromReal(real, precision, scale));
ASSERT_EQ(dec.ToIntegerString(), expected);
EXPECT_EQ(dec.ToIntegerString(), expected);
}

template <typename Real>
Expand Down Expand Up @@ -868,24 +868,27 @@ template <typename T>
class TestDecimalFromRealFloat : public ::testing::Test {
protected:
std::vector<FromFloatTestParam> GetValues() {
return {// 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"},
FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"},
FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"},
FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"},
// 2**64 - 2**40 (exactly representable in a float)
FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"},
FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"},
// 2**64 + 2**41 (exactly representable in a float)
FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"},
FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"},
FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"},
FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"},
// Almost 10**38 (minus 2**103)
FromFloatTestParam{9.999999e+37f, 38, 0,
"99999986661652122824821048795547566080"},
FromFloatTestParam{-9.999999e+37f, 38, 0,
"-99999986661652122824821048795547566080"}};
return {
// 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"},
FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"},
FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"},
FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"},
// 2**64 - 2**40 (exactly representable in a float)
FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"},
FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"},
// 2**64 + 2**41 (exactly representable in a float)
FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"},
FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"},
FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"},
FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"},
// Almost 10**38 (minus 2**103)
FromFloatTestParam{9.999999e+37f, 38, 0,
"99999986661652122824821048795547566080"},
FromFloatTestParam{-9.999999e+37f, 38, 0,
"-99999986661652122824821048795547566080"},
// TODO Hand-picked test cases that can involve precision issues
};
}
};
TYPED_TEST_SUITE(TestDecimalFromRealFloat, DecimalTypes);
Expand Down Expand Up @@ -916,28 +919,44 @@ template <typename T>
class TestDecimalFromRealDouble : public ::testing::Test {
protected:
std::vector<FromDoubleTestParam> GetValues() {
return {// 2**63 + 2**11 (exactly representable in a double's 53 bits of precision)
FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"},
FromDoubleTestParam{-9.223372036854778e+18, 19, 0, "-9223372036854777856"},
FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777856"},
FromDoubleTestParam{-9.223372036854778e+10, 19, 8, "-92233720368.54777856"},
// 2**64 - 2**11 (exactly representable in a double)
FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"},
FromDoubleTestParam{-1.844674407370955e+19, 20, 0, "-18446744073709549568"},
// 2**64 + 2**11 (exactly representable in a double)
FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"},
FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, "-18446744073709555712"},
FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5712"},
FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, "-1844674407370955.5712"},
// Almost 10**38 (minus 2**73)
FromDoubleTestParam{9.999999999999998e+37, 38, 0,
"99999999999999978859343891977453174784"},
FromDoubleTestParam{-9.999999999999998e+37, 38, 0,
"-99999999999999978859343891977453174784"},
FromDoubleTestParam{9.999999999999998e+27, 38, 10,
"9999999999999997885934389197.7453174784"},
FromDoubleTestParam{-9.999999999999998e+27, 38, 10,
"-9999999999999997885934389197.7453174784"}};
return {
// 2**63 + 2**11 (exactly representable in a double's 53 bits of precision)
FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"},
FromDoubleTestParam{-9.223372036854778e+18, 19, 0, "-9223372036854777856"},
FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777856"},
FromDoubleTestParam{-9.223372036854778e+10, 19, 8, "-92233720368.54777856"},
// 2**64 - 2**11 (exactly representable in a double)
FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"},
FromDoubleTestParam{-1.844674407370955e+19, 20, 0, "-18446744073709549568"},
// 2**64 + 2**11 (exactly representable in a double)
FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"},
FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, "-18446744073709555712"},
FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5712"},
FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, "-1844674407370955.5712"},
// Almost 10**38 (minus 2**73)
FromDoubleTestParam{9.999999999999998e+37, 38, 0,
"99999999999999978859343891977453174784"},
FromDoubleTestParam{-9.999999999999998e+37, 38, 0,
"-99999999999999978859343891977453174784"},
FromDoubleTestParam{9.999999999999998e+27, 38, 10,
"9999999999999997885934389197.7453174784"},
FromDoubleTestParam{-9.999999999999998e+27, 38, 10,
"-9999999999999997885934389197.7453174784"},
// Hand-picked test cases that can involve precision issues.
// More comprehensive testing is done in the PyArrow test suite.
FromDoubleTestParam{999999999999999.0, 16, 1, "999999999999999.0"},
FromDoubleTestParam{-999999999999999.0, 16, 1, "999999999999999.0"},
FromDoubleTestParam{9999999999999998.0, 17, 1, "9999999999999998.0"},
FromDoubleTestParam{-9999999999999998.0, 17, 1, "-9999999999999998.0"},
FromDoubleTestParam{999999999999999.9, 16, 1, "999999999999999.9"},
FromDoubleTestParam{-999999999999999.9, 16, 1, "-999999999999999.9"},
FromDoubleTestParam{9999999987., 38, 22, "9999999987.0000000000000000000000"},
FromDoubleTestParam{-9999999987., 38, 22, "-9999999987.0000000000000000000000"},
FromDoubleTestParam{9999999987., 38, 28,
"9999999987.0000000000000000000000000000"},
FromDoubleTestParam{-9999999987., 38, 28,
"-9999999987.0000000000000000000000000000"},
};
}
};
TYPED_TEST_SUITE(TestDecimalFromRealDouble, DecimalTypes);
Expand All @@ -952,15 +971,26 @@ TYPED_TEST(TestDecimalFromRealDouble, SuccessConversion) {
TYPED_TEST(TestDecimalFromRealDouble, LargeValues) {
// Test the entire double range
for (int32_t scale = -308; scale <= 308; ++scale) {
ARROW_SCOPED_TRACE("scale = ", scale);
double real = std::pow(10.0, static_cast<double>(scale));
CheckDecimalFromRealIntegerString<TypeParam>(real, 1, -scale, "1");
}
for (int32_t scale = -307; scale <= 306; ++scale) {
ARROW_SCOPED_TRACE("scale = ", scale);
double real = 123. * std::pow(10.0, static_cast<double>(scale));
CheckDecimalFromRealIntegerString<TypeParam>(real, 2, -scale - 1, "12");
CheckDecimalFromRealIntegerString<TypeParam>(real, 3, -scale, "123");
CheckDecimalFromRealIntegerString<TypeParam>(real, 4, -scale + 1, "1230");
}
for (int32_t scale = -292; scale <= 291; ++scale) {
// Exactly 16 decimal digits can fit in a double's mantissa
double real = 1234567890123456. * std::pow(10.0, static_cast<double>(scale));
ARROW_SCOPED_TRACE("scale = ", scale);
CheckDecimalFromRealIntegerString<TypeParam>(real, 15, -scale - 1, "123456789012346");
CheckDecimalFromRealIntegerString<TypeParam>(real, 16, -scale, "1234567890123456");
// Cannot test precision 17 as the trailing digit depends on FP rounding
// during the computation of `real` above.
}
}

// Additional values that only apply to Decimal256
Expand Down
82 changes: 82 additions & 0 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
# specific language governing permissions and limitations
# under the License.

from collections import namedtuple
import datetime
import decimal
from functools import lru_cache, partial
import inspect
import itertools
import math
import os
import pickle
import pytest
Expand Down Expand Up @@ -79,6 +82,8 @@
pa.float64()
]

floating_point_arrow_types = [pa.float32(), pa.float64()]


def test_exported_functions():
# Check that all exported concrete functions can be called with
Expand Down Expand Up @@ -1818,6 +1823,83 @@ def test_fsl_to_fsl_cast(value_type):
fsl.cast(cast_type)


DecimalTypeTraits = namedtuple('DecimalTypeTraits',
('name', 'factory', 'max_precision'))

FloatToDecimalCase = namedtuple('FloatToDecimalCase',
('precision', 'scale', 'float_val'))

#decimal_type_traits = [DecimalTypeTraits(pa.decimal128, 38),
#DecimalTypeTraits(pa.decimal256, 76)]
decimal_type_traits = [DecimalTypeTraits('decimal128', pa.decimal128, 38)]


def largest_scaled_float_not_above(val, scale):
# Find the largest float f such as `f * 10**scale <= val`
assert val >= 0
float_val = float(val * 10**-scale)
if float_val * 10**scale > val:
# Take the float just below... it *should* satisfy
float_val = math.nextafter(float_val, 0.0)
assert float_val * 10**scale <= val
#print("val =", val, float_val)
return float_val


def integral_float_to_decimal_cast_cases(max_precision):
for precision in range(1, max_precision):
#for scale in range(0, min(10, precision)):
for scale in range(0, precision):
abs_minval = 1
# Exact unscaled limit in the integer domain
abs_maxval = largest_scaled_float_not_above(
10**precision - 10**scale, scale)
for val in (0, abs_minval, -abs_minval,
abs_maxval, -abs_maxval):
yield FloatToDecimalCase(precision, scale, float(val))


def real_float_to_decimal_cast_cases(max_precision):
for precision in range(1, max_precision):
for scale in range(0, min(10, precision)):
#for scale in range(0, precision):
abs_minval = largest_scaled_float_not_above(1, scale)
abs_maxval = largest_scaled_float_not_above(
10**precision - 1, scale)
for val in (0, abs_minval, -abs_minval,
abs_maxval, -abs_maxval):
yield FloatToDecimalCase(precision, scale, float(val))


#@pytest.mark.parametrize('float_ty', floating_point_arrow_types, ids=str)
@pytest.mark.parametrize('float_ty', [pa.float64()], ids=str)
@pytest.mark.parametrize('decimal_ty', decimal_type_traits,
ids=lambda v: v.name)
@pytest.mark.parametrize('case_generator',
[integral_float_to_decimal_cast_cases,
real_float_to_decimal_cast_cases],
ids=['integrals', 'reals'])
def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator):
with decimal.localcontext() as ctx:
for case in case_generator(decimal_ty.max_precision):
# Use the Python decimal module to build the expected result
# using the right precision
ctx.prec = case.precision
# XXX which decimal rounding mode is expected??
s = pa.scalar(case.float_val, type=float_ty)
expected = ctx.create_decimal_from_float(case.float_val)
print(f"{case} => expected = {expected!r}")
cast_to = decimal_ty.factory(case.precision, case.scale)
actual = pc.cast(s, cast_to).as_py()
assert actual == expected

# TODO add random-generated cases of float-to-decimal cast:
# - by generating random integers and scaling them using decimal.Decimal
# - by generating random floats and converting them to decimal.Decimal
# XXX move these to test_cast.py / test_decimal.py?
# TODO overflow tests


def test_strptime():
arr = pa.array(["5/1/2020", None, "12/13/1900"])

Expand Down

0 comments on commit 9ea5dd6

Please sign in to comment.