From 55d52d2cdf701ddaeff2483569946a1d1e72cf6f Mon Sep 17 00:00:00 2001 From: Taco de Wolff Date: Fri, 5 Jan 2024 21:58:27 -0300 Subject: [PATCH] JS: fix converting \n and \r to unicode, do so only in template literals --- go.mod | 2 +- go.sum | 3 ++- js/js_test.go | 5 ++++- js/util.go | 42 +++++++++++++++++++++++++++++++----------- 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/go.mod b/go.mod index eff4bbc3b..0b57e14e5 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2 github.com/tdewolff/argp v0.0.0-20231129210956-bb03d6873d97 github.com/tdewolff/parse/v2 v2.7.7 - github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52 + github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 ) require ( diff --git a/go.sum b/go.sum index f9a956d8e..79e77b888 100644 --- a/go.sum +++ b/go.sum @@ -13,7 +13,8 @@ github.com/tdewolff/argp v0.0.0-20231129210956-bb03d6873d97/go.mod h1:fF+gnKbmf3 github.com/tdewolff/parse/v2 v2.7.7 h1:V+50eFDH7Piw4IBwH8D8FtYeYbZp3T4SCtIvmBSIMyc= github.com/tdewolff/parse/v2 v2.7.7/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA= github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= -github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52 h1:gAQliwn+zJrkjAHVcBEYW/RFvd2St4yYimisvozAYlA= github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= +github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03u/dMQK9g+Iw9ewps4mCl1nB8Sscbo= +github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8= golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/js/js_test.go b/js/js_test.go index 9e1825d99..97a561155 100644 --- a/js/js_test.go +++ b/js/js_test.go @@ -152,8 +152,10 @@ func TestJS(t *testing.T) { {`"string\a\c\'string"`, `"stringac'string"`}, {`"string\∀string"`, `"string∀string"`}, {`"string\0\uFFFFstring"`, "\"string\\0\uffffstring\""}, - {`"string\x00\x55\x0A\x0D\x22\x27string"`, `"string\x00U\n\r\"'string"`}, + {`"string\x00\x55\x0A\x0D\x22\x27string"`, "`string\\x00U\n\r\"'string`"}, {`"string\000\12\015\042\47\411string"`, "\"string\\0\\n\\r\\\"'!1string\""}, + {`'\x0A\x0D'`, "`\n\r`"}, + {`'\12\15'`, `"\n\r"`}, {`"\x005"`, `"\x005"`}, {"'string\\n\\rstring'", "`string\n\rstring`"}, {"'string\\\r\nstring\\\nstring\\\rstring\\\u2028string\\\u2029string'", `"stringstringstringstringstringstring"`}, @@ -807,6 +809,7 @@ func TestJS(t *testing.T) { {`function transform(){{var aaaa=[];for(var b=0;;){}for(var b in aaaa){}var aaaa=[];for(var b=0;;){}}}`, `function transform(){{for(var aaaa=[],b=0;;);for(b in aaaa);for(aaaa=[],b=0;;);}}`}, // #619 {`for(var a=0;;){var b=5;for(var c=0;;);}`, `for(var b,c,a=0;;)for(b=5,c=0;;);`}, // #634 {"if(a)for(;;)\n;else b", `if(a)for(;;);else b`}, // #636 + {`'\u000A\u000D'`, "`\n\r`"}, // #653 } m := minify.New() diff --git a/js/util.go b/js/util.go index ada17651a..edcfabbcb 100644 --- a/js/util.go +++ b/js/util.go @@ -940,7 +940,7 @@ func minifyString(b []byte, allowTemplate bool) []byte { backtickQuotes := 0 newlines := 0 dollarSigns := 0 - notEscapes := false + hasOctals := false for i := 1; i < len(b)-1; i++ { if b[i] == '\'' { singleQuotes++ @@ -954,7 +954,19 @@ func minifyString(b []byte, allowTemplate bool) []byte { if b[i+1] == 'n' || b[i+1] == 'r' { newlines++ } else if '1' <= b[i+1] && b[i+1] <= '9' || b[i+1] == '0' && i+2 < len(b) && '0' <= b[i+2] && b[i+2] <= '9' { - notEscapes = true + hasOctals = true + } else if b[i+1] == 'x' && i+3 < len(b) && b[i+2] == '0' && (b[i+3]|0x20 == 'a' || b[i+3]|0x20 == 'd') { + newlines++ + } else if b[i+1] == 'u' && i+5 < len(b) && b[i+2] == '0' && b[i+3] == '0' && b[i+4] == '0' && (b[i+5]|0x20 == 'a' || b[i+5]|0x20 == 'd') { + newlines++ + } else if b[i+1] == 'u' && i+4 < len(b) && b[i+2] == '{' { + j := i + 3 + for j < len(b) && b[j] == '0' { + j++ + } + if j+1 < len(b) && (b[j]|0x20 == 'a' || b[j]|0x20 == 'd') && b[j+1] == '}' { + newlines++ + } } } } @@ -966,7 +978,7 @@ func minifyString(b []byte, allowTemplate bool) []byte { } else if singleQuotes < doubleQuotes { quote = byte('\'') } - if allowTemplate && !notEscapes && backtickQuotes+dollarSigns < quotes+newlines { + if allowTemplate && !hasOctals && backtickQuotes+dollarSigns < quotes+newlines { quote = byte('`') } b[0] = quote @@ -983,7 +995,7 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { for i := prefix; i < len(b)-suffix-1; i++ { if c := b[i]; c == '\\' { c = b[i+1] - if c == quote || c == '\\' || quote != '`' && (c == 'n' || c == 'r') || c == '0' && (len(b)-suffix <= i+2 || b[i+2] < '0' || '7' < b[i+2]) { + if c == quote || c == '\\' || c == '0' && (len(b)-suffix <= i+2 || b[i+2] < '0' || '7' < b[i+2]) { // keep escape sequence i++ continue @@ -1004,7 +1016,7 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { // hexadecimal escapes _, _ = hex.Decode(b[i:i+1:i+1], b[i+2:i+4]) n = 4 - if b[i] == '\\' || b[i] == quote || b[i] == '\n' || b[i] == '\r' || b[i] == 0 { + if b[i] == '\\' || b[i] == quote || quote != '`' && (b[i] == '\n' || b[i] == '\r') || b[i] == 0 { if b[i] == '\n' { b[i+1] = 'n' } else if b[i] == '\r' { @@ -1061,7 +1073,7 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { i += 4 n -= 4 } - } else { + } else if quote == '`' || num != 10 && num != 13 { // decode unicode character to UTF-8 and put at the end of the escape sequence // then skip the first part of the escape sequence until the decoded character m := utf8.RuneLen(rune(num)) @@ -1072,9 +1084,17 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { utf8.EncodeRune(b[i:], rune(num)) i += m n -= m + } else { + if num == 10 { + b[i+1] = 'n' + } else { + b[i+1] = 'r' + } + i += 2 + n -= 2 } } else if '0' <= c && c <= '7' { - // octal escapes (legacy), \0 already handled + // octal escapes (legacy), \0 already handled (quote != `) num := c - '0' n++ if i+2 < len(b)-1 && '0' <= b[i+2] && b[i+2] <= '7' { @@ -1102,11 +1122,11 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { } i++ n-- - } else if c == 'n' { - b[i] = '\n' // only for template literals + } else if quote == '`' && c == 'n' { + b[i] = '\n' i++ - } else if c == 'r' { - b[i] = '\r' // only for template literals + } else if quote == '`' && c == 'r' { + b[i] = '\r' i++ } else if c == 't' { b[i] = '\t'