From ff976a37d79628d680cfd43cb492b5bb2e9bdbfb Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Mon, 10 Oct 2022 17:03:50 -0400 Subject: [PATCH 1/2] support capital X for hex escapes in string literals --- src/google/protobuf/io/tokenizer.cc | 4 ++-- src/google/protobuf/io/tokenizer_unittest.cc | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index 4d7f56cc55f9..a9da85d082f1 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -422,7 +422,7 @@ void Tokenizer::ConsumeString(char delimiter) { // Possibly followed by two more octal digits, but these will // just be consumed by the main loop anyway so we don't need // to do so explicitly here. - } else if (TryConsume('x')) { + } else if (TryConsume('x') || TryConsume('X')) { if (!TryConsumeOne()) { AddError("Expected hex digits for escape sequence."); } @@ -1216,7 +1216,7 @@ void Tokenizer::ParseStringAppend(const std::string& text, } output->push_back(static_cast(code)); - } else if (*ptr == 'x') { + } else if (*ptr == 'x' || *ptr == 'X') { // A hex escape. May zero, one, or two digits. (The zero case // will have been caught as an error earlier.) int code = 0; diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index f4d70f0d51d8..3d59f637942a 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -1045,6 +1045,8 @@ TEST_F(TokenizerTest, ParseString) { EXPECT_EQ("\1x\1\123\739\52\334n\3", output); Tokenizer::ParseString("'\\x20\\x4'", &output); EXPECT_EQ("\x20\x4", output); + Tokenizer::ParseString("'\\X20\\X4'", &output); + EXPECT_EQ("\x20\x4", output); // Test invalid strings that may still be tokenized as strings. Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid @@ -1110,7 +1112,7 @@ inline std::ostream& operator<<(std::ostream& out, const ErrorCase& test_case) { ErrorCase kErrorCases[] = { // String errors. {"'\\l' foo", true, "0:2: Invalid escape sequence in string literal.\n"}, - {"'\\X' foo", true, "0:2: Invalid escape sequence in string literal.\n"}, + {"'\\X' foo", true, "0:3: Expected hex digits for escape sequence.\n"}, {"'\\x' foo", true, "0:3: Expected hex digits for escape sequence.\n"}, {"'foo", false, "0:4: Unexpected end of string.\n"}, {"'bar\nfoo", true, From 40169b651669151c85e1c9fead4aa4db931baf47 Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Mon, 10 Oct 2022 17:20:09 -0400 Subject: [PATCH 2/2] more tests to prevent regression in descriptor handling capital X escapes --- src/google/protobuf/descriptor_unittest.cc | 44 +++++++++++++--------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index 726592b946c7..ab187ae578bd 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -2775,15 +2775,19 @@ TEST_F(MiscTest, DefaultValues) { ->set_default_value("hello"); AddField(message_proto, "data", 9, label, FD::TYPE_BYTES) ->set_default_value("\\001\\002\\003"); + AddField(message_proto, "data2", 10, label, FD::TYPE_BYTES) + ->set_default_value("\\X01\\X2\\X3"); + AddField(message_proto, "data3", 11, label, FD::TYPE_BYTES) + ->set_default_value("\\x01\\x2\\x3"); FieldDescriptorProto* enum_field = - AddField(message_proto, "enum", 10, label, FD::TYPE_ENUM); + AddField(message_proto, "enum", 12, label, FD::TYPE_ENUM); enum_field->set_type_name("DummyEnum"); enum_field->set_default_value("B"); // Strings are allowed to have empty defaults. (At one point, due to // a bug, empty defaults for strings were rejected. Oops.) - AddField(message_proto, "empty_string", 11, label, FD::TYPE_STRING) + AddField(message_proto, "empty_string", 13, label, FD::TYPE_STRING) ->set_default_value(""); // Add a second set of fields with implicit default values. @@ -2813,7 +2817,7 @@ TEST_F(MiscTest, DefaultValues) { ASSERT_EQ(1, file->message_type_count()); const Descriptor* message = file->message_type(0); - ASSERT_EQ(21, message->field_count()); + ASSERT_EQ(23, message->field_count()); // Check the default values. ASSERT_TRUE(message->field(0)->has_default_value()); @@ -2827,6 +2831,8 @@ TEST_F(MiscTest, DefaultValues) { ASSERT_TRUE(message->field(8)->has_default_value()); ASSERT_TRUE(message->field(9)->has_default_value()); ASSERT_TRUE(message->field(10)->has_default_value()); + ASSERT_TRUE(message->field(11)->has_default_value()); + ASSERT_TRUE(message->field(12)->has_default_value()); EXPECT_EQ(-1, message->field(0)->default_value_int32()); EXPECT_EQ(int64_t{-1000000000000}, message->field(1)->default_value_int64()); @@ -2837,11 +2843,11 @@ TEST_F(MiscTest, DefaultValues) { EXPECT_TRUE(message->field(6)->default_value_bool()); EXPECT_EQ("hello", message->field(7)->default_value_string()); EXPECT_EQ("\001\002\003", message->field(8)->default_value_string()); - EXPECT_EQ(enum_value_b, message->field(9)->default_value_enum()); - EXPECT_EQ("", message->field(10)->default_value_string()); + EXPECT_EQ("\001\002\003", message->field(9)->default_value_string()); + EXPECT_EQ("\001\002\003", message->field(10)->default_value_string()); + EXPECT_EQ(enum_value_b, message->field(11)->default_value_enum()); + EXPECT_EQ("", message->field(12)->default_value_string()); - ASSERT_FALSE(message->field(11)->has_default_value()); - ASSERT_FALSE(message->field(12)->has_default_value()); ASSERT_FALSE(message->field(13)->has_default_value()); ASSERT_FALSE(message->field(14)->has_default_value()); ASSERT_FALSE(message->field(15)->has_default_value()); @@ -2850,17 +2856,19 @@ TEST_F(MiscTest, DefaultValues) { ASSERT_FALSE(message->field(18)->has_default_value()); ASSERT_FALSE(message->field(19)->has_default_value()); ASSERT_FALSE(message->field(20)->has_default_value()); - - EXPECT_EQ(0, message->field(11)->default_value_int32()); - EXPECT_EQ(0, message->field(12)->default_value_int64()); - EXPECT_EQ(0, message->field(13)->default_value_uint32()); - EXPECT_EQ(0, message->field(14)->default_value_uint64()); - EXPECT_EQ(0.0f, message->field(15)->default_value_float()); - EXPECT_EQ(0.0, message->field(16)->default_value_double()); - EXPECT_FALSE(message->field(17)->default_value_bool()); - EXPECT_EQ("", message->field(18)->default_value_string()); - EXPECT_EQ("", message->field(19)->default_value_string()); - EXPECT_EQ(enum_value_a, message->field(20)->default_value_enum()); + ASSERT_FALSE(message->field(21)->has_default_value()); + ASSERT_FALSE(message->field(22)->has_default_value()); + + EXPECT_EQ(0, message->field(13)->default_value_int32()); + EXPECT_EQ(0, message->field(14)->default_value_int64()); + EXPECT_EQ(0, message->field(15)->default_value_uint32()); + EXPECT_EQ(0, message->field(16)->default_value_uint64()); + EXPECT_EQ(0.0f, message->field(17)->default_value_float()); + EXPECT_EQ(0.0, message->field(18)->default_value_double()); + EXPECT_FALSE(message->field(19)->default_value_bool()); + EXPECT_EQ("", message->field(20)->default_value_string()); + EXPECT_EQ("", message->field(21)->default_value_string()); + EXPECT_EQ(enum_value_a, message->field(22)->default_value_enum()); } TEST_F(MiscTest, FieldOptions) {