diff --git a/tools/inspector_protocol/encoding/encoding.cc b/tools/inspector_protocol/encoding/encoding.cc index 89988476b87851..f7e933e41afac5 100644 --- a/tools/inspector_protocol/encoding/encoding.cc +++ b/tools/inspector_protocol/encoding/encoding.cc @@ -1386,7 +1386,7 @@ class JSONEncoder : public StreamingParserHandler { // Disallow overlong encodings for ascii characters, as these // would include " and other characters significant to JSON // string termination / control. - if (codepoint < 0x7f) + if (codepoint <= 0x7f) continue; // Invalid in UTF8, and can't be represented in UTF16 anyway. if (codepoint > 0x10ffff) diff --git a/tools/inspector_protocol/encoding/encoding_test.cc b/tools/inspector_protocol/encoding/encoding_test.cc index 6893fe2581683c..554aa1b2cbaedb 100644 --- a/tools/inspector_protocol/encoding/encoding_test.cc +++ b/tools/inspector_protocol/encoding/encoding_test.cc @@ -1325,6 +1325,25 @@ void WriteUTF8AsUTF16(StreamingParserHandler* writer, const std::string& utf8) { writer->HandleString16(SpanFrom(UTF8ToUTF16(SpanFrom(utf8)))); } +TEST(JsonEncoder, OverlongEncodings) { + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + + // We encode 0x7f, which is the DEL ascii character, as a 4 byte UTF8 + // sequence. This is called an overlong encoding, because only 1 byte + // is needed to represent 0x7f as UTF8. + std::vector chars = { + 0xf0, // Starts 4 byte utf8 sequence + 0x80, // continuation byte + 0x81, // continuation byte w/ payload bit 7 set to 1. + 0xbf, // continuation byte w/ payload bits 0-6 set to 11111. + }; + writer->HandleString8(SpanFrom(chars)); + EXPECT_EQ("\"\"", out); // Empty string means that 0x7f was rejected (good). +} + TEST(JsonStdStringWriterTest, HelloWorld) { std::string out; Status status; @@ -1561,6 +1580,13 @@ TEST_F(JsonParserTest, UsAsciiDelCornerCase) { "string16: a\x7f\n" "map end\n", log_.str()); + + // We've seen an implementation of UTF16ToUTF8 which would replace the DEL + // character with ' ', so this simple roundtrip tests the routines in + // encoding_test_helper.h, to make test failures of the above easier to + // diagnose. + std::vector utf16 = UTF8ToUTF16(SpanFrom(json)); + EXPECT_EQ(json, UTF16ToUTF8(SpanFrom(utf16))); } TEST_F(JsonParserTest, Whitespace) { diff --git a/tools/inspector_protocol/lib/encoding_cpp.template b/tools/inspector_protocol/lib/encoding_cpp.template index 8a9d6f64e471f8..a0377d12f7dbb1 100644 --- a/tools/inspector_protocol/lib/encoding_cpp.template +++ b/tools/inspector_protocol/lib/encoding_cpp.template @@ -1394,7 +1394,7 @@ class JSONEncoder : public StreamingParserHandler { // Disallow overlong encodings for ascii characters, as these // would include " and other characters significant to JSON // string termination / control. - if (codepoint < 0x7f) + if (codepoint <= 0x7f) continue; // Invalid in UTF8, and can't be represented in UTF16 anyway. if (codepoint > 0x10ffff)