Skip to content

Commit

Permalink
alternator: add validation of numbers' magnitude and precision
Browse files Browse the repository at this point in the history
DynamoDB limits the allowed magnitude and precision of numbers - valid
decimal exponents are between -130 and 125 and up to 38 significant
decimal digitst are allowed. In contrast, Scylla uses the CQL "decimal"
type which offers unlimited precision. This can cause two problems:

1. Users might get used to this "unofficial" feature and start relying
   on it, not allowing us to switch to a more efficient limited-precision
   implementation later.

2. If huge exponents are allowed, e.g., 1e-1000000, summing such a
   number with 1.0 will result in a huge number, huge allocations and
   stalls. This is highly undesirable.

After this patch, all tests in test/alternator/test_number.py now
pass. The various failing tests which verify magnitude and precision
limitations in different places (key attributes, non-key attributes,
and arithmetic expressions) now pass - so their "xfail" tags are removed.

Fixes scylladb#6794

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
  • Loading branch information
nyh committed May 2, 2023
1 parent d294167 commit 4401148
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 26 deletions.
136 changes: 120 additions & 16 deletions alternator/serialization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,115 @@ type_representation represent_type(alternator_type atype) {
return it->second;
}

// Get the magnitude and precision of a big_decimal - as these concepts are
// defined by DynamoDB - to allow us to enforce limits on those as explained
// in ssue #6794. The "magnitude" of 9e123 is 123 and of -9e-123 is -123,
// the "precision" of 12.34e56 is the number of significant digits - 4.
//
// Unfortunately it turned out to be quite difficult to take a big_decimal and
// calculate its magnitude and precision from its scale() and unscaled_value().
// So in the following ugly implementation we calculate them from the string
// representation instead. We assume the number was already parsed
// sucessfully to a big_decimal to it follows its syntax rules.
//
// FIXME: rewrite this function to take a big_decimal, not a string.
// Maybe a snippet like this can help:
// boost::multiprecision::cpp_int digits = boost::multiprecision::log10(num.unscaled_value().convert_to<boost::multiprecision::mpf_float_50>()).convert_to<boost::multiprecision::cpp_int>() + 1;


internal::magnitude_and_precision internal::get_magnitude_and_precision(std::string_view s) {
size_t e_or_end = s.find_first_of("eE");
std::string_view base = s.substr(0, e_or_end);
if (s[0]=='-' || s[0]=='+') {
base = base.substr(1);
}
int magnitude = 0;
int precision = 0;
size_t dot_or_end = base.find_first_of(".");
size_t nonzero = base.find_first_not_of("0");
if (dot_or_end != std::string_view::npos) {
if (nonzero == dot_or_end) {
// 0.000031 => magnitude = -5 (like 3.1e-5), precision = 2.
std::string_view fraction = base.substr(dot_or_end + 1);
size_t nonzero2 = fraction.find_first_not_of("0");
if (nonzero2 != std::string_view::npos) {
magnitude = -nonzero2 - 1;
precision = fraction.size() - nonzero2;
}
} else {
// 000123.45678 => magnitude = 2, precision = 8.
magnitude = dot_or_end - nonzero - 1;
precision = base.size() - nonzero - 1;
}
// trailing zeros don't count to precision, e.g., precision
// of 1000.0, 1.0 or 1.0000 are just 1.
size_t last_significant = base.find_last_not_of(".0");
if (last_significant == std::string_view::npos) {
precision = 0;
} else if (last_significant < dot_or_end) {
// e.g., 1000.00 reduce 5 = 7 - (0+1) - 1 from precision
precision -= base.size() - last_significant - 2;
} else {
// e.g., 1235.60 reduce 5 = 7 - (5+1) from precision
precision -= base.size() - last_significant - 1;
}
} else if (nonzero == std::string_view::npos) {
// all-zero integer 000000
magnitude = 0;
precision = 0;
} else {
magnitude = base.size() - 1 - nonzero;
precision = base.size() - nonzero;
// trailing zeros don't count to precision, e.g., precision
// of 1000 is just 1.
size_t last_significant = base.find_last_not_of("0");
if (last_significant == std::string_view::npos) {
precision = 0;
} else {
// e.g., 1000 reduce 3 = 4 - (0+1)
precision -= base.size() - last_significant - 1;
}
}
if (precision && e_or_end != std::string_view::npos) {
std::string_view exponent = s.substr(e_or_end + 1);
if (exponent.size() > 4) {
// don't even bother atoi(), exponent is too large
magnitude = exponent[0]=='-' ? -9999 : 9999;
} else {
try {
magnitude += boost::lexical_cast<int32_t>(exponent);
} catch (...) {
magnitude = 9999;
}
}
}
return magnitude_and_precision {magnitude, precision};
}

// Parse a number read from user input, validating that it has a valid
// numeric format and also in the allowed magnitude and precision ranges
// (see issue #6794). Throws an api_error::validation if the validation
// failed.
static big_decimal parse_and_validate_number(std::string_view s) {
try {
big_decimal ret(s);
auto [magnitude, precision] = internal::get_magnitude_and_precision(s);
if (magnitude > 125) {
throw api_error::validation(format("Number overflow: {}. Attempting to store a number with magnitude larger than supported range.", s));
}
if (magnitude < -130) {
throw api_error::validation(format("Number underflow: {}. Attempting to store a number with magnitude lower than supported range.", s));
}
if (precision > 38) {
throw api_error::validation(format("Number too precise: {}. Attempting to store a number with more significant digits than supported.", s));
}
return ret;
} catch (const marshal_exception& e) {
throw api_error::validation(format("The parameter cannot be converted to a numeric value: {}", s));
}

}

struct from_json_visitor {
const rjson::value& v;
bytes_ostream& bo;
Expand All @@ -67,11 +176,7 @@ struct from_json_visitor {
bo.write(boolean_type->decompose(v.GetBool()));
}
void operator()(const decimal_type_impl& t) const {
try {
bo.write(t.from_string(rjson::to_string_view(v)));
} catch (const marshal_exception& e) {
throw api_error::validation(format("The parameter cannot be converted to a numeric value: {}", v));
}
bo.write(decimal_type->decompose(parse_and_validate_number(rjson::to_string_view(v))));
}
// default
void operator()(const abstract_type& t) const {
Expand Down Expand Up @@ -203,6 +308,8 @@ bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column
// FIXME: it's difficult at this point to get information if value was provided
// in request or comes from the storage, for now we assume it's user's fault.
return *unwrap_bytes(value, true);
} else if (column.type == decimal_type) {
return decimal_type->decompose(parse_and_validate_number(rjson::to_string_view(value)));
} else {
return column.type->from_string(value_view);
}
Expand Down Expand Up @@ -295,16 +402,13 @@ big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic) {
if (it->name != "N") {
throw api_error::validation(format("{}: expected number, found type '{}'", diagnostic, it->name));
}
try {
if (!it->value.IsString()) {
// We shouldn't reach here. Callers normally validate their input
// earlier with validate_value().
throw api_error::validation(format("{}: improperly formatted number constant", diagnostic));
}
return big_decimal(rjson::to_string_view(it->value));
} catch (const marshal_exception& e) {
throw api_error::validation(format("The parameter cannot be converted to a numeric value: {}", it->value));
if (!it->value.IsString()) {
// We shouldn't reach here. Callers normally validate their input
// earlier with validate_value().
throw api_error::validation(format("{}: improperly formatted number constant", diagnostic));
}
big_decimal ret = parse_and_validate_number(rjson::to_string_view(it->value));
return ret;
}

std::optional<big_decimal> try_unwrap_number(const rjson::value& v) {
Expand All @@ -316,8 +420,8 @@ std::optional<big_decimal> try_unwrap_number(const rjson::value& v) {
return std::nullopt;
}
try {
return big_decimal(rjson::to_string_view(it->value));
} catch (const marshal_exception& e) {
return parse_and_validate_number(rjson::to_string_view(it->value));
} catch (api_error&) {
return std::nullopt;
}
}
Expand Down
7 changes: 7 additions & 0 deletions alternator/serialization.hh
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,12 @@ std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value&
// Returns a null value if one of the arguments is not actually a list.
rjson::value list_concatenate(const rjson::value& v1, const rjson::value& v2);

namespace internal {
struct magnitude_and_precision {
int magnitude;
int precision;
};
magnitude_and_precision get_magnitude_and_precision(std::string_view);
}

}
19 changes: 9 additions & 10 deletions test/alternator/test_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def test_number_magnitude_allowed(test_table_s):
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == num

# Test that numbers of too big (or small) a magnitude cannot be stored.
@pytest.mark.xfail(reason="Number type allows too much magnitude and precision")
def test_number_magnitude_not_allowed(test_table_s):
p = random_string()
for num in [Decimal("1e126"), Decimal("11e125")]:
Expand All @@ -76,9 +75,8 @@ def test_number_magnitude_not_allowed(test_table_s):
# magnitudes, that requires an unlimited amount of memory and CPU time - i.e.,
# a DoS attack. The attacker can cause a std::bad_alloc, large allocations,
# and very long scheduler stall, all with a very short request.
# Due to issue #6794 we need to skip this test, because it takes a very
# When we had issue #6794 we had to skip this test, because it took a very
# long time and/or crashes Scylla.
@pytest.mark.skip(reason="Issue #6794")
def test_number_magnitude_not_allowed_dos(test_table_s):
p = random_string()
# Python's "Decimal" type and the way it's used by the Boto3 library
Expand All @@ -87,8 +85,8 @@ def test_number_magnitude_not_allowed_dos(test_table_s):
# strings.
a = "1.0"
b = "1.0e100000000"
with pytest.raises(ClientError, match='ValidationException.*overflow'):
with client_no_transform(test_table_s.meta.client) as client:
with client_no_transform(test_table_s.meta.client) as client:
with pytest.raises(ClientError, match='ValidationException.*overflow'):
client.update_item(TableName=test_table_s.name,
Key={'p': {'S': p}},
UpdateExpression='SET x = :a + :b',
Expand All @@ -108,7 +106,6 @@ def test_number_precision_allowed(test_table_s):

# Check that numbers with more significant digits than supported (38 decimal
# digits) cannot be stored.
@pytest.mark.xfail(reason="Number type allows too much magnitude and precision")
def test_number_precision_not_allowed(test_table_s):
p = random_string()
for num in [Decimal("3.14159265358979323846264338327950288419"),
Expand All @@ -123,7 +120,6 @@ def test_number_precision_not_allowed(test_table_s):
# columns, and the following tests do the same for a numberic key column.
# Because different code paths are involved for serializing and storing
# key and non-key columns, it's important to check this case as well.
@pytest.mark.xfail(reason="Issue #6794")
def test_number_magnitude_key(test_table_sn):
p = random_string()
# Legal magnitudes are allowed:
Expand Down Expand Up @@ -157,7 +153,6 @@ def test_number_magnitude_key(test_table_sn):
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': x})

@pytest.mark.xfail(reason="Issue #6794")
def test_number_precision_key(test_table_sn):
p = random_string()
# Legal precision is allowed:
Expand Down Expand Up @@ -196,7 +191,6 @@ def test_update_expression_plus_precision(test_table_s):

# Some additions or subtractions can result in overflow to the allowed range,
# causing the update to fail: 9e125 + 9e125 = 1.8e126 which overflows.
@pytest.mark.xfail(reason="Number type allows too much magnitude and precision")
def test_update_expression_plus_overflow(test_table_s):
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*overflow'):
Expand All @@ -207,11 +201,16 @@ def test_update_expression_plus_overflow(test_table_s):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 - :val2',
ExpressionAttributeValues={':val1': Decimal("9e125"), ':val2': Decimal("-9e125")})
# Validate that the individual operands aren't too large - the only
# problem was the sum
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 + :val2',
ExpressionAttributeValues={':val1': Decimal("9e125"), ':val2': Decimal("9e124")})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == Decimal("9.9e125")

# Similarly, addition or subtraction can also result in unsupported precision
# and causing the update to fail: For example, 1e50 + 1 cannot be represented
# in 38 digits of precision.
@pytest.mark.xfail(reason="Number type allows too much magnitude and precision")
def test_update_expression_plus_imprecise(test_table_s):
p = random_string()
# Strangely, DynamoDB says that the error is: "Number overflow. Attempting
Expand Down

0 comments on commit 4401148

Please sign in to comment.