Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

protoc: fix consistency with parsing very large decimal numbers #10555

44 changes: 31 additions & 13 deletions src/google/protobuf/compiler/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,16 @@ bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
}
}

bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
io::Tokenizer::ParseInteger(input_->current().text, max_value,
output)) {
input_->Next();
return true;
}
return false;
}

bool Parser::ConsumeNumber(double* output, const char* error) {
if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
*output = io::Tokenizer::ParseFloat(input_->current().text);
Expand All @@ -296,13 +306,19 @@ bool Parser::ConsumeNumber(double* output, const char* error) {
} else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
// Also accept integers.
uint64_t value = 0;
if (!io::Tokenizer::ParseInteger(input_->current().text,
if (io::Tokenizer::ParseInteger(input_->current().text,
std::numeric_limits<uint64_t>::max(),
&value)) {
*output = value;
} else if (input_->current().text[0] == '0') {
// octal or hexadecimal; don't bother parsing as float
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
Comment on lines +313 to +316
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put this here, instead of just relying on the branch below, because I think an octal literal would likely be successfully parsed by ParseFloat, but incorrectly interpreted as decimal.

} else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
// out of int range, and not valid float? 🤷
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
Comment on lines +317 to 320
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is "just in case". I can't think of any actual scenario, other octal or hex literal (handled above), where ParseFloat would fail. But I also really don't want this inadvertently raising an exception.

}
*output = value;
input_->Next();
return true;
} else if (LookingAt("inf")) {
Expand Down Expand Up @@ -1551,18 +1567,20 @@ bool Parser::ParseOption(Message* options,
is_negative
? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
: std::numeric_limits<uint64_t>::max();
DO(ConsumeInteger64(max_value, &value, "Expected integer."));
if (is_negative) {
value_location.AddPath(
UninterpretedOption::kNegativeIntValueFieldNumber);
uninterpreted_option->set_negative_int_value(
static_cast<int64_t>(0 - value));
} else {
value_location.AddPath(
UninterpretedOption::kPositiveIntValueFieldNumber);
uninterpreted_option->set_positive_int_value(value);
if (TryConsumeInteger64(max_value, &value)) {
if (is_negative) {
value_location.AddPath(
UninterpretedOption::kNegativeIntValueFieldNumber);
uninterpreted_option->set_negative_int_value(
static_cast<int64_t>(0 - value));
} else {
value_location.AddPath(
UninterpretedOption::kPositiveIntValueFieldNumber);
uninterpreted_option->set_positive_int_value(value);
}
break;
}
break;
// value too large for an integer; fall through below to treat as floating point
}

case io::Tokenizer::TYPE_FLOAT: {
Expand Down
3 changes: 3 additions & 0 deletions src/google/protobuf/compiler/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ class PROTOBUF_EXPORT Parser {
// is greater than max_value, an error will be reported.
bool ConsumeInteger64(uint64_t max_value, uint64_t* output,
const char* error);
// Try to consume a 64-bit integer and store its value in "output". No
// error is reported on failure, allowing caller to consume token another way.
bool TryConsumeInteger64(uint64_t max_value, uint64_t* output);
// Consume a number and store its value in "output". This will accept
// tokens of either INTEGER or FLOAT type.
bool ConsumeNumber(double* output, const char* error);
Expand Down
66 changes: 66 additions & 0 deletions src/google/protobuf/compiler/parser_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,56 @@ TEST_F(ParseMessageTest, FieldOptions) {
"}");
}

TEST_F(ParseMessageTest, FieldOptionsSupportLargeDecimalLiteral) {
// decimal integer literal > uint64 max
ExpectParsesTo(
"import \"google/protobuf/descriptor.proto\";\n"
"extend google.protobuf.FieldOptions {\n"
" optional double f = 10101;\n"
"}\n"
"message TestMessage {\n"
" optional double a = 1 [default = 18446744073709551616];\n"
" optional double b = 2 [default = -18446744073709551616];\n"
" optional double c = 3 [(f) = 18446744073709551616];\n"
" optional double d = 4 [(f) = -18446744073709551616];\n"
"}\n",

"dependency: \"google/protobuf/descriptor.proto\""
"extension {"
" name: \"f\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 10101"
" extendee: \"google.protobuf.FieldOptions\""
"}"
"message_type {"
" name: \"TestMessage\""
" field {"
" name: \"a\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 1"
" default_value: \"1.8446744073709552e+19\""
" }"
" field {"
" name: \"b\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 2"
" default_value: \"-1.8446744073709552e+19\""
" }"
" field {"
" name: \"c\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 3"
" options{"
" uninterpreted_option{"
" name{ name_part: \"f\" is_extension: true }"
" double_value: 1.8446744073709552e+19"
" }"
" }"
" }"
" field {"
" name: \"d\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 4"
" options{"
" uninterpreted_option{"
" name{ name_part: \"f\" is_extension: true }"
" double_value: -1.8446744073709552e+19"
" }"
" }"
" }"
"}");
fowles marked this conversation as resolved.
Show resolved Hide resolved
}

TEST_F(ParseMessageTest, Oneof) {
ExpectParsesTo(
"message TestMessage {\n"
Expand Down Expand Up @@ -1891,6 +1941,22 @@ TEST_F(ParserValidationErrorTest, FieldDefaultValueError) {
"2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n");
}

TEST_F(ParserValidationErrorTest, FieldDefaultIntegerOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [default = 0x10000000000000000];\n"
"}\n",
"1:37: Integer out of range.\n");
}

TEST_F(ParserValidationErrorTest, FieldOptionOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [foo = 0x10000000000000000];\n"
"}\n",
"1:33: Integer out of range.\n");
}

TEST_F(ParserValidationErrorTest, FileOptionNameError) {
ExpectHasValidationErrors(
"option foo = 5;",
Expand Down
50 changes: 30 additions & 20 deletions src/google/protobuf/descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "google/protobuf/stubs/stringprintf.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
Expand Down Expand Up @@ -7675,6 +7676,27 @@ bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
return true;
}

namespace {
// Helpers for method below

template <typename T> std::string ValueOutOfRange(
absl::string_view type_name, absl::string_view option_name) {
return absl::StrFormat(
"Value out of range, %d to %d, for %s option \"%s\".", \
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
type_name, option_name);
}

template <typename T> std::string ValueMustBeInt(
absl::string_view type_name, absl::string_view option_name) {
return absl::StrFormat(
"Value must be integer, from %d to %d, for %s option \"%s\".", \
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
type_name, option_name);
}
fowles marked this conversation as resolved.
Show resolved Hide resolved

} // namespace

bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
// We switch on the CppType to validate.
Expand All @@ -7683,8 +7705,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() >
static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
return AddValueError("Value out of range for int32 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
} else {
SetInt32(option_field->number(),
uninterpreted_option_->positive_int_value(),
Expand All @@ -7693,25 +7714,22 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
} else if (uninterpreted_option_->has_negative_int_value()) {
if (uninterpreted_option_->negative_int_value() <
static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
return AddValueError("Value out of range for int32 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
} else {
SetInt32(option_field->number(),
uninterpreted_option_->negative_int_value(),
option_field->type(), unknown_fields);
}
} else {
return AddValueError("Value must be integer for int32 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<int32_t>("int32", option_field->full_name()));
}
break;

case FieldDescriptor::CPPTYPE_INT64:
if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() >
static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
return AddValueError("Value out of range for int64 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueOutOfRange<int64_t>("int64", option_field->full_name()));
} else {
SetInt64(option_field->number(),
uninterpreted_option_->positive_int_value(),
Expand All @@ -7722,27 +7740,22 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
uninterpreted_option_->negative_int_value(),
option_field->type(), unknown_fields);
} else {
return AddValueError("Value must be integer for int64 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<int64_t>("int64", option_field->full_name()));
}
break;

case FieldDescriptor::CPPTYPE_UINT32:
if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() >
std::numeric_limits<uint32_t>::max()) {
return AddValueError("Value out of range for uint32 option \"" +
option_field->name() + "\".");
return AddValueError(ValueOutOfRange<uint32_t>("uint32", option_field->full_name()));
} else {
SetUInt32(option_field->number(),
uninterpreted_option_->positive_int_value(),
option_field->type(), unknown_fields);
}
} else {
return AddValueError(
"Value must be non-negative integer for uint32 "
"option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<uint32_t>("uint32", option_field->full_name()));
}
break;

Expand All @@ -7752,10 +7765,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
uninterpreted_option_->positive_int_value(),
option_field->type(), unknown_fields);
} else {
return AddValueError(
"Value must be non-negative integer for uint64 "
"option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<uint64_t>("uint64", option_field->full_name()));
}
break;

Expand Down
16 changes: 8 additions & 8 deletions src/google/protobuf/descriptor_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5557,7 +5557,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfPositiveRange) {
" positive_int_value: 0x80000000 } "
"}",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
"for int32 option \"foo\".\n");
}

Expand All @@ -5574,7 +5574,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfNegativeRange) {
" negative_int_value: -0x80000001 } "
"}",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
"for int32 option \"foo\".\n");
}

Expand All @@ -5590,7 +5590,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueIsNotPositiveInt) {
" is_extension: true } "
" string_value: \"5\" } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -2147483648 to 2147483647, "
"for int32 option \"foo\".\n");
}

Expand All @@ -5608,7 +5608,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueOutOfRange) {
"} "
"}",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -9223372036854775808 to 9223372036854775807, "
"for int64 option \"foo\".\n");
}

Expand All @@ -5624,7 +5624,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueIsNotPositiveInt) {
" is_extension: true } "
" identifier_value: \"5\" } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -9223372036854775808 to 9223372036854775807, "
"for int64 option \"foo\".\n");
}

Expand All @@ -5640,7 +5640,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueOutOfRange) {
" is_extension: true } "
" positive_int_value: 0x100000000 } }",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, 0 to 4294967295, "
"for uint32 option \"foo\".\n");
}

Expand All @@ -5656,7 +5656,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueIsNotPositiveInt) {
" is_extension: true } "
" double_value: -5.6 } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 4294967295, "
"for uint32 option \"foo\".\n");
}

Expand All @@ -5672,7 +5672,7 @@ TEST_F(ValidationErrorTest, UInt64OptionValueIsNotPositiveInt) {
" is_extension: true } "
" negative_int_value: -5 } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 18446744073709551615, "
"for uint64 option \"foo\".\n");
}

Expand Down
20 changes: 13 additions & 7 deletions src/google/protobuf/io/tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1002,9 +1002,20 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value,
}

double Tokenizer::ParseFloat(const std::string& text) {
double result;
jhump marked this conversation as resolved.
Show resolved Hide resolved
if (!TryParseFloat(text, &result)) {
LOG(DFATAL)
fowles marked this conversation as resolved.
Show resolved Hide resolved
jhump marked this conversation as resolved.
Show resolved Hide resolved
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
}
return result;
}

bool Tokenizer::TryParseFloat(const std::string& text, double* result) {
const char* start = text.c_str();
char* end;
double result = NoLocaleStrtod(start, &end);
*result = NoLocaleStrtod(start, &end);

// "1e" is not a valid float, but if the tokenizer reads it, it will
// report an error but still return it as a valid token. We need to
Expand All @@ -1020,12 +1031,7 @@ double Tokenizer::ParseFloat(const std::string& text) {
++end;
}

GOOGLE_LOG_IF(DFATAL,
static_cast<size_t>(end - start) != text.size() || *start == '-')
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
return result;
return static_cast<size_t>(end - start) == text.size() && *start != '-';
}

// Helper to append a Unicode code point to a string as UTF8, without bringing
Expand Down
4 changes: 4 additions & 0 deletions src/google/protobuf/io/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ class PROTOBUF_EXPORT Tokenizer {
// result is undefined (possibly an assert failure).
static double ParseFloat(const std::string& text);

// Parses given text as if it were a TYPE_FLOAT token. Returns false if the
// given text is not actually a valid float literal.
static bool TryParseFloat(const std::string& text, double* result);

// Parses a TYPE_STRING token. This never fails, so long as the text actually
// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
// result is undefined (possibly an assert failure).
Expand Down