Skip to content

Commit

Permalink
Merge pull request #10757 from jhump/jh/accept-capital-x
Browse files Browse the repository at this point in the history
protoc: accept capital X to indicate hex escape in string literals
  • Loading branch information
fowles committed Oct 10, 2022
2 parents 2c71df4 + 40169b6 commit f8984a6
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 21 deletions.
44 changes: 26 additions & 18 deletions src/google/protobuf/descriptor_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2775,15 +2775,19 @@ TEST_F(MiscTest, DefaultValues) {
->set_default_value("hello");
AddField(message_proto, "data", 9, label, FD::TYPE_BYTES)
->set_default_value("\\001\\002\\003");
AddField(message_proto, "data2", 10, label, FD::TYPE_BYTES)
->set_default_value("\\X01\\X2\\X3");
AddField(message_proto, "data3", 11, label, FD::TYPE_BYTES)
->set_default_value("\\x01\\x2\\x3");

FieldDescriptorProto* enum_field =
AddField(message_proto, "enum", 10, label, FD::TYPE_ENUM);
AddField(message_proto, "enum", 12, label, FD::TYPE_ENUM);
enum_field->set_type_name("DummyEnum");
enum_field->set_default_value("B");

// Strings are allowed to have empty defaults. (At one point, due to
// a bug, empty defaults for strings were rejected. Oops.)
AddField(message_proto, "empty_string", 11, label, FD::TYPE_STRING)
AddField(message_proto, "empty_string", 13, label, FD::TYPE_STRING)
->set_default_value("");

// Add a second set of fields with implicit default values.
Expand Down Expand Up @@ -2813,7 +2817,7 @@ TEST_F(MiscTest, DefaultValues) {
ASSERT_EQ(1, file->message_type_count());
const Descriptor* message = file->message_type(0);

ASSERT_EQ(21, message->field_count());
ASSERT_EQ(23, message->field_count());

// Check the default values.
ASSERT_TRUE(message->field(0)->has_default_value());
Expand All @@ -2827,6 +2831,8 @@ TEST_F(MiscTest, DefaultValues) {
ASSERT_TRUE(message->field(8)->has_default_value());
ASSERT_TRUE(message->field(9)->has_default_value());
ASSERT_TRUE(message->field(10)->has_default_value());
ASSERT_TRUE(message->field(11)->has_default_value());
ASSERT_TRUE(message->field(12)->has_default_value());

EXPECT_EQ(-1, message->field(0)->default_value_int32());
EXPECT_EQ(int64_t{-1000000000000}, message->field(1)->default_value_int64());
Expand All @@ -2837,11 +2843,11 @@ TEST_F(MiscTest, DefaultValues) {
EXPECT_TRUE(message->field(6)->default_value_bool());
EXPECT_EQ("hello", message->field(7)->default_value_string());
EXPECT_EQ("\001\002\003", message->field(8)->default_value_string());
EXPECT_EQ(enum_value_b, message->field(9)->default_value_enum());
EXPECT_EQ("", message->field(10)->default_value_string());
EXPECT_EQ("\001\002\003", message->field(9)->default_value_string());
EXPECT_EQ("\001\002\003", message->field(10)->default_value_string());
EXPECT_EQ(enum_value_b, message->field(11)->default_value_enum());
EXPECT_EQ("", message->field(12)->default_value_string());

ASSERT_FALSE(message->field(11)->has_default_value());
ASSERT_FALSE(message->field(12)->has_default_value());
ASSERT_FALSE(message->field(13)->has_default_value());
ASSERT_FALSE(message->field(14)->has_default_value());
ASSERT_FALSE(message->field(15)->has_default_value());
Expand All @@ -2850,17 +2856,19 @@ TEST_F(MiscTest, DefaultValues) {
ASSERT_FALSE(message->field(18)->has_default_value());
ASSERT_FALSE(message->field(19)->has_default_value());
ASSERT_FALSE(message->field(20)->has_default_value());

EXPECT_EQ(0, message->field(11)->default_value_int32());
EXPECT_EQ(0, message->field(12)->default_value_int64());
EXPECT_EQ(0, message->field(13)->default_value_uint32());
EXPECT_EQ(0, message->field(14)->default_value_uint64());
EXPECT_EQ(0.0f, message->field(15)->default_value_float());
EXPECT_EQ(0.0, message->field(16)->default_value_double());
EXPECT_FALSE(message->field(17)->default_value_bool());
EXPECT_EQ("", message->field(18)->default_value_string());
EXPECT_EQ("", message->field(19)->default_value_string());
EXPECT_EQ(enum_value_a, message->field(20)->default_value_enum());
ASSERT_FALSE(message->field(21)->has_default_value());
ASSERT_FALSE(message->field(22)->has_default_value());

EXPECT_EQ(0, message->field(13)->default_value_int32());
EXPECT_EQ(0, message->field(14)->default_value_int64());
EXPECT_EQ(0, message->field(15)->default_value_uint32());
EXPECT_EQ(0, message->field(16)->default_value_uint64());
EXPECT_EQ(0.0f, message->field(17)->default_value_float());
EXPECT_EQ(0.0, message->field(18)->default_value_double());
EXPECT_FALSE(message->field(19)->default_value_bool());
EXPECT_EQ("", message->field(20)->default_value_string());
EXPECT_EQ("", message->field(21)->default_value_string());
EXPECT_EQ(enum_value_a, message->field(22)->default_value_enum());
}

TEST_F(MiscTest, FieldOptions) {
Expand Down
4 changes: 2 additions & 2 deletions src/google/protobuf/io/tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ void Tokenizer::ConsumeString(char delimiter) {
// Possibly followed by two more octal digits, but these will
// just be consumed by the main loop anyway so we don't need
// to do so explicitly here.
} else if (TryConsume('x')) {
} else if (TryConsume('x') || TryConsume('X')) {
if (!TryConsumeOne<HexDigit>()) {
AddError("Expected hex digits for escape sequence.");
}
Expand Down Expand Up @@ -1216,7 +1216,7 @@ void Tokenizer::ParseStringAppend(const std::string& text,
}
output->push_back(static_cast<char>(code));

} else if (*ptr == 'x') {
} else if (*ptr == 'x' || *ptr == 'X') {
// A hex escape. May zero, one, or two digits. (The zero case
// will have been caught as an error earlier.)
int code = 0;
Expand Down
4 changes: 3 additions & 1 deletion src/google/protobuf/io/tokenizer_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1045,6 +1045,8 @@ TEST_F(TokenizerTest, ParseString) {
EXPECT_EQ("\1x\1\123\739\52\334n\3", output);
Tokenizer::ParseString("'\\x20\\x4'", &output);
EXPECT_EQ("\x20\x4", output);
Tokenizer::ParseString("'\\X20\\X4'", &output);
EXPECT_EQ("\x20\x4", output);

// Test invalid strings that may still be tokenized as strings.
Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid
Expand Down Expand Up @@ -1110,7 +1112,7 @@ inline std::ostream& operator<<(std::ostream& out, const ErrorCase& test_case) {
ErrorCase kErrorCases[] = {
// String errors.
{"'\\l' foo", true, "0:2: Invalid escape sequence in string literal.\n"},
{"'\\X' foo", true, "0:2: Invalid escape sequence in string literal.\n"},
{"'\\X' foo", true, "0:3: Expected hex digits for escape sequence.\n"},
{"'\\x' foo", true, "0:3: Expected hex digits for escape sequence.\n"},
{"'foo", false, "0:4: Unexpected end of string.\n"},
{"'bar\nfoo", true,
Expand Down

0 comments on commit f8984a6

Please sign in to comment.