From 0947ccd0b88483801fc8b986a60762fce78abfaa Mon Sep 17 00:00:00 2001 From: Eugene Yokota Date: Thu, 26 Mar 2020 18:51:04 -0400 Subject: [PATCH 1/2] Allow \" in single-quoted string interpolations Changing `"Hello, \"World\""` to `s"Hello, \"$who\""` no longer breaks. Before this change, `\"` terminated single-quoted interpolated string literals, now the string remains open. The scanner doesn't interpret the escape sequence, string interpolators can do so (`s` and `f` do). Breaking changes: - `raw"c:\"` no longer compiles, it's now an unclosed string - `raw"c:\" // uh"` used to evaluate to `"""c:\"""`, now it's `"""c:\" // uh"""` --- .../scala/tools/nsc/ast/parser/Scanners.scala | 30 ++++++++++++++----- test/files/neg/t6476.check | 4 +++ test/files/neg/t6476.scala | 9 ++++++ test/files/neg/t6476b.check | 7 +++++ test/files/neg/t6476b.scala | 8 +++++ test/files/neg/t8266-invalid-interp.check | 4 +-- test/files/neg/t8266-invalid-interp.scala | 2 +- test/files/pos/t11966.scala | 2 +- test/files/run/interpolation-repl.check | 12 ++++++++ test/files/run/interpolation-repl.scala | 9 ++++++ test/files/run/t6476.check | 13 ++++++++ test/files/run/t6476.scala | 23 ++++++++++++++ 12 files changed, 112 insertions(+), 11 deletions(-) create mode 100644 test/files/neg/t6476.check create mode 100644 test/files/neg/t6476.scala create mode 100644 test/files/neg/t6476b.check create mode 100644 test/files/neg/t6476b.scala create mode 100644 test/files/run/interpolation-repl.check create mode 100644 test/files/run/interpolation-repl.scala create mode 100644 test/files/run/t6476.check create mode 100644 test/files/run/t6476.scala diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index a4f8efc43eea..5c165a6dfed0 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -854,7 +854,12 @@ trait Scanners extends ScannersCommon { } else unclosedStringLit() } - private def unclosedStringLit(): Unit = syntaxError("unclosed string literal") + private def unclosedStringLit(seenEscapedQuoteInInterpolation: Boolean = false): Unit = { + val note = + if (seenEscapedQuoteInInterpolation) "; note that `\\\"` no longer closes single-quoted interpolated string literals since 2.13.6, you can use a triple-quoted string instead" + else "" + syntaxError(s"unclosed string literal$note") + } private def replaceUnicodeEscapesInTriple(): Unit = if(strVal != null) { @@ -890,7 +895,8 @@ trait Scanners extends ScannersCommon { } } - @tailrec private def getStringPart(multiLine: Boolean): Unit = { + // for interpolated strings + @tailrec private def getStringPart(multiLine: Boolean, seenEscapedQuote: Boolean = false): Unit = { def finishStringPart() = { setStrVal() token = STRINGPART @@ -904,18 +910,27 @@ trait Scanners extends ScannersCommon { setStrVal() token = STRINGLIT } else - getStringPart(multiLine) + getStringPart(multiLine, seenEscapedQuote) } else { nextChar() setStrVal() token = STRINGLIT } + } else if (ch == '\\' && !multiLine) { + putChar(ch) + nextRawChar() + val q = ch == '"' + if (q || ch == '\\') { + putChar(ch) + nextRawChar() + } + getStringPart(multiLine, seenEscapedQuote || q) } else if (ch == '$') { nextRawChar() if (ch == '$' || ch == '"') { putChar(ch) nextRawChar() - getStringPart(multiLine) + getStringPart(multiLine, seenEscapedQuote) } else if (ch == '{') { finishStringPart() nextRawChar() @@ -946,13 +961,14 @@ trait Scanners extends ScannersCommon { if (isUnclosedLiteral) { if (multiLine) incompleteInputError("unclosed multi-line string literal") - else - unclosedStringLit() + else { + unclosedStringLit(seenEscapedQuote) + } } else { putChar(ch) nextRawChar() - getStringPart(multiLine) + getStringPart(multiLine, seenEscapedQuote) } } } diff --git a/test/files/neg/t6476.check b/test/files/neg/t6476.check new file mode 100644 index 000000000000..bf0c65efc6b8 --- /dev/null +++ b/test/files/neg/t6476.check @@ -0,0 +1,4 @@ +t6476.scala:8: error: unclosed string literal; note that `\"` no longer closes single-quoted interpolated string literals since 2.13.6, you can use a triple-quoted string instead + mimi"\" + ^ +1 error diff --git a/test/files/neg/t6476.scala b/test/files/neg/t6476.scala new file mode 100644 index 000000000000..9b88e43593cb --- /dev/null +++ b/test/files/neg/t6476.scala @@ -0,0 +1,9 @@ +// only the last one doesn't parse +class C { + mimi"""\ """ + mimi"""\\""" + mimi"""\""" + mimi"\ " + mimi"\\" + mimi"\" +} diff --git a/test/files/neg/t6476b.check b/test/files/neg/t6476b.check new file mode 100644 index 000000000000..e6aa3e441214 --- /dev/null +++ b/test/files/neg/t6476b.check @@ -0,0 +1,7 @@ +t6476b.scala:2: error: invalid escape at terminal index 0 in "\". Use \\ for literal \. + val sa = s"""\""" + ^ +t6476b.scala:4: error: invalid escape '\ ' not one of [\b, \t, \n, \f, \r, \\, \", \', \uxxxx] at index 0 in "\ ". Use \\ for literal \. + val sc = s"""\ """ + ^ +2 errors diff --git a/test/files/neg/t6476b.scala b/test/files/neg/t6476b.scala new file mode 100644 index 000000000000..d601091972ce --- /dev/null +++ b/test/files/neg/t6476b.scala @@ -0,0 +1,8 @@ +class C { + val sa = s"""\""" + val sb = s"""\\""" + val sc = s"""\ """ + val ra = raw"""\""" + val rb = raw"""\\""" + val rc = raw"""\ """ +} diff --git a/test/files/neg/t8266-invalid-interp.check b/test/files/neg/t8266-invalid-interp.check index 0f55ef3eaf42..bdfcd97d6039 100644 --- a/test/files/neg/t8266-invalid-interp.check +++ b/test/files/neg/t8266-invalid-interp.check @@ -1,6 +1,6 @@ t8266-invalid-interp.scala:4: error: Trailing '\' escapes nothing. - f"a\", - ^ + f"""a\""", + ^ t8266-invalid-interp.scala:5: error: invalid escape '\x' not one of [\b, \t, \n, \f, \r, \\, \", \', \uxxxx] at index 1 in "a\xc". Use \\ for literal \. f"a\xc", ^ diff --git a/test/files/neg/t8266-invalid-interp.scala b/test/files/neg/t8266-invalid-interp.scala index 4b26546880a3..87579a68691b 100644 --- a/test/files/neg/t8266-invalid-interp.scala +++ b/test/files/neg/t8266-invalid-interp.scala @@ -1,7 +1,7 @@ trait X { def f = Seq( - f"a\", + f"""a\""", f"a\xc", // following could suggest \u000b for vertical tab, similar for \a alert f"a\vc" diff --git a/test/files/pos/t11966.scala b/test/files/pos/t11966.scala index 2e9632a34869..b662e71322da 100644 --- a/test/files/pos/t11966.scala +++ b/test/files/pos/t11966.scala @@ -3,5 +3,5 @@ object Test { val original = """\/ \/ /\""" val minimal = """\1234\""" - val alternative = raw"\1234\" + val alternative = raw"""\1234\""" } \ No newline at end of file diff --git a/test/files/run/interpolation-repl.check b/test/files/run/interpolation-repl.check new file mode 100644 index 000000000000..c6e246c806b1 --- /dev/null +++ b/test/files/run/interpolation-repl.check @@ -0,0 +1,12 @@ + +scala> raw"\"" +val res0: String = \" + +scala> raw"\" // this used to be a comment, but after scala/pull#8830 it's part of the string! " +val res1: String = "\" // this used to be a comment, but after scala/pull#8830 it's part of the string! " + +scala> raw"\" // this used to compile, now it's unclosed + ^ + error: unclosed string literal; note that `\"` no longer closes single-quoted interpolated string literals since 2.13.6, you can use a triple-quoted string instead + +scala> :quit diff --git a/test/files/run/interpolation-repl.scala b/test/files/run/interpolation-repl.scala new file mode 100644 index 000000000000..ba84178ce92c --- /dev/null +++ b/test/files/run/interpolation-repl.scala @@ -0,0 +1,9 @@ +import scala.tools.partest.ReplTest + +object Test extends ReplTest { + def code = """ +raw"\"" +raw"\" // this used to be a comment, but after scala/pull#8830 it's part of the string! " +raw"\" // this used to compile, now it's unclosed +""" +} diff --git a/test/files/run/t6476.check b/test/files/run/t6476.check new file mode 100644 index 000000000000..b7be3ae88a91 --- /dev/null +++ b/test/files/run/t6476.check @@ -0,0 +1,13 @@ +"Hello", Alice +"Hello", Alice +"Hello", Alice +"Hello", Alice +\"Hello\", Alice +\"Hello\", Alice +\TILT\ +\TILT\ +\\TILT\\ +\TILT\ +\TILT\ +\\TILT\\ +\TILT\ diff --git a/test/files/run/t6476.scala b/test/files/run/t6476.scala new file mode 100644 index 000000000000..a04645065a2a --- /dev/null +++ b/test/files/run/t6476.scala @@ -0,0 +1,23 @@ +object Test { + def main(args: Array[String]): Unit = { + val person = "Alice" + println(s"\"Hello\", $person") + println(s"""\"Hello\", $person""") + + println(f"\"Hello\", $person") + println(f"""\"Hello\", $person""") + + println(raw"\"Hello\", $person") + println(raw"""\"Hello\", $person""") + + println(s"\\TILT\\") + println(f"\\TILT\\") + println(raw"\\TILT\\") + + println(s"""\\TILT\\""") + println(f"""\\TILT\\""") + println(raw"""\\TILT\\""") + + println(raw"""\TILT\""") + } +} From 62f515d0d9a4c82c4cf681035a0ee73e918c2cf5 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 8 Mar 2021 16:09:07 +0100 Subject: [PATCH 2/2] Spec for \" in interpolated strings Also, unicode escapes are no longer interpreted in interpolated strings. Interpolators can still interpret them, but that's not in the spec. --- spec/01-lexical-syntax.md | 28 +++++++++++++++------------- spec/13-syntax-summary.md | 6 ++++-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/spec/01-lexical-syntax.md b/spec/01-lexical-syntax.md index c345935941b9..718950b171a1 100644 --- a/spec/01-lexical-syntax.md +++ b/spec/01-lexical-syntax.md @@ -463,7 +463,7 @@ arbitrary, except that it may contain three or more consecutive quote characters only at the very end. Characters must not necessarily be printable; newlines or other control characters are also permitted. [Escape sequences](#escape-sequences) are -not processed, except for Unicode escapes. +not processed, except for Unicode escapes (this is deprecated since 2.13.2). > ```scala > """the present string @@ -503,8 +503,9 @@ not processed, except for Unicode escapes. #### Interpolated string ```ebnf -interpolatedString ::= alphaid ‘"’ {printableChar \ (‘"’ | ‘$’) | escape} ‘"’ - | alphaid ‘"""’ {[‘"’] [‘"’] char \ (‘"’ | ‘$’) | escape} {‘"’} ‘"""’ +interpolatedString ::= alphaid ‘"’ {[‘\’] interpolatedStringPart | ‘\\’ | ‘\"’} ‘"’ + | alphaid ‘"""’ {[‘"’] [‘"’] char \ (‘"’ | ‘$’) | escape} {‘"’} ‘"""’ +interpolatedStringPart ::= printableChar \ (‘"’ | ‘$’ | ‘\’) | escape escape ::= ‘$$’ | ‘$"’ | ‘$’ id @@ -514,23 +515,24 @@ alphaid ::= upper idrest ``` -Interpolated string consist of an identifier starting with a letter immediately +An interpolated string consists of an identifier starting with a letter immediately followed by a string literal. There may be no whitespace characters or comments -between the leading identifier and the opening quote ‘”’ of the string. -The string literal in a interpolated string can be standard (single quote) +between the leading identifier and the opening quote `"` of the string. +The string literal in an interpolated string can be standard (single quote) or multi-line (triple quote). -Inside a interpolated string none of the usual escape characters are interpreted -(except for unicode escapes) no matter whether the string literal is normal -(enclosed in single quotes) or multi-line (enclosed in triple quotes). -Instead, there are three new forms of dollar sign escape. +Inside an interpolated string none of the usual escape characters are interpreted +no matter whether the string literal is normal (enclosed in single quotes) or +multi-line (enclosed in triple quotes). Note that the sequence `\"` does not +close a normal string literal (enclosed in single quotes). + +There are three forms of dollar sign escape. The most general form encloses an expression in `${` and `}`, i.e. `${expr}`. The expression enclosed in the braces that follow the leading `$` character is of syntactical category BlockExpr. Hence, it can contain multiple statements, and newlines are significant. Single ‘$’-signs are not permitted in isolation -in a interpolated string. A single ‘$’-sign can still be obtained by doubling the ‘$’ -character: ‘$$’. A single ‘"’-sign in a single quoted interpolation would end the -interpolation. A single ‘"’-sign can be obtained by the sequence ‘\$"’. +in an interpolated string. A single ‘$’-sign can still be obtained by doubling the ‘$’ +character: ‘$$’. A single ‘"’-sign can be obtained by the sequence ‘\$"’. The simpler form consists of a ‘$’-sign followed by an identifier starting with a letter and followed only by letters, digits, and underscore characters, diff --git a/spec/13-syntax-summary.md b/spec/13-syntax-summary.md index 442d76adb7a4..aec631beb45f 100644 --- a/spec/13-syntax-summary.md +++ b/spec/13-syntax-summary.md @@ -60,9 +60,11 @@ stringElement ::= charNoDoubleQuoteOrNewline | escapeSeq multiLineChars ::= {[‘"’] [‘"’] charNoDoubleQuote} {‘"’} -interpolatedString - ::= alphaid ‘"’ {printableChar \ (‘"’ | ‘\$’) | escape} ‘"’ +interpolatedString + ::= alphaid ‘"’ {[‘\’] interpolatedStringPart | ‘\\’ | ‘\"’} ‘"’ | alphaid ‘"""’ {[‘"’] [‘"’] char \ (‘"’ | ‘\$’) | escape} {‘"’} ‘"""’ +interpolatedStringPart + ::= printableChar \ (‘"’ | ‘$’ | ‘\’) | escape escape ::= ‘\$\$’ | ‘\$"’ | ‘\$’ id