From a907b4fce637c136d94790984294cab33280f7c4 Mon Sep 17 00:00:00 2001 From: Devon Hudson Date: Fri, 8 Dec 2023 13:31:50 -0700 Subject: [PATCH] Fix canonical json utf-16 surrogate pair detection logic --- json.go | 2 +- json_test.go | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/json.go b/json.go index 91c45b82..3d8e7e26 100644 --- a/json.go +++ b/json.go @@ -329,7 +329,7 @@ func compactUnicodeEscape(input, output []byte, index int) ([]byte, int) { // Otherwise the character only needs escaping if it is a QUOTE '"' or BACKSLASH '\\'. output = append(output, '\\', byte(c)) } else if utf16.IsSurrogate(c) { - if input[index] != '\\' && input[index+1] != 'u' { + if input[index] != '\\' || input[index+1] != 'u' { return output, index } index += 2 // skip the \u" diff --git a/json_test.go b/json_test.go index 8c27802e..0de74bb5 100644 --- a/json_test.go +++ b/json_test.go @@ -125,6 +125,26 @@ func TestCompactUnicodeEscapeWithUTF16Surrogate(t *testing.T) { } } +func TestCompactUnicodeEscapeWithBadUTF16Surrogate(t *testing.T) { + input := []byte(`\ud83d\zdc08`) + output, n := compactUnicodeEscape(input[2:], nil, 0) + if n != 4 { + t.Fatalf("should have consumed 4 bytes but consumed %d bytes", n) + } + if string(output) != "" { + t.Fatalf("expected output to be empty") + } + + input = []byte(`\ud83d udc08`) + output, n = compactUnicodeEscape(input[2:], nil, 0) + if n != 4 { + t.Fatalf("should have consumed 4 bytes but consumed %d bytes", n) + } + if string(output) != "" { + t.Fatalf("expected output to be empty") + } +} + func testReadHex(t *testing.T, input string, want rune) { got := readHexDigits([]byte(input)) if want != got {