diff --git a/src/compiler/scala/tools/nsc/javac/JavaScanners.scala b/src/compiler/scala/tools/nsc/javac/JavaScanners.scala index 3f8ee1166a08..770e680012c0 100644 --- a/src/compiler/scala/tools/nsc/javac/JavaScanners.scala +++ b/src/compiler/scala/tools/nsc/javac/JavaScanners.scala @@ -239,6 +239,9 @@ trait JavaScanners extends ast.parser.ScannersCommon { */ protected def putChar(c: Char): Unit = { cbuf.append(c) } + /** Remove the last N characters from the buffer */ + private def popNChars(n: Int): Unit = if (n > 0) cbuf.setLength(cbuf.length - n) + /** Clear buffer and set name */ private def setName(): Unit = { name = newTermName(cbuf.toString()) @@ -322,15 +325,26 @@ trait JavaScanners extends ast.parser.ScannersCommon { case '\"' => in.next() - while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) { - getlitch() - } - if (in.ch == '\"') { - token = STRINGLIT - setName() - in.next() + if (in.ch != '\"') { // "..." non-empty string literal + while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) { + getlitch() + } + if (in.ch == '\"') { + token = STRINGLIT + setName() + in.next() + } else { + syntaxError("unclosed string literal") + } } else { - syntaxError("unclosed string literal") + in.next() + if (in.ch != '\"') { // "" empty string literal + token = STRINGLIT + setName() + } else { + in.next() + getTextBlock() + } } return @@ -664,9 +678,12 @@ trait JavaScanners extends ast.parser.ScannersCommon { // Literals ----------------------------------------------------------------- /** read next character in character or string literal: - */ - protected def getlitch() = - if (in.ch == '\\') { + * + * @param scanOnly skip emitting errors or adding to the literal buffer + * @param inTextBlock is this for a text block? + */ + protected def getlitch(scanOnly: Boolean = false, inTextBlock: Boolean = false): Unit = { + val c: Char = if (in.ch == '\\') { in.next() if ('0' <= in.ch && in.ch <= '7') { val leadch: Char = in.ch @@ -680,27 +697,147 @@ trait JavaScanners extends ast.parser.ScannersCommon { in.next() } } - putChar(oct.asInstanceOf[Char]) + oct.asInstanceOf[Char] } else { - in.ch match { - case 'b' => putChar('\b') - case 't' => putChar('\t') - case 'n' => putChar('\n') - case 'f' => putChar('\f') - case 'r' => putChar('\r') - case '\"' => putChar('\"') - case '\'' => putChar('\'') - case '\\' => putChar('\\') + val c: Char = in.ch match { + case 'b' => '\b' + case 's' => ' ' + case 't' => '\t' + case 'n' => '\n' + case 'f' => '\f' + case 'r' => '\r' + case '\"' => '\"' + case '\'' => '\'' + case '\\' => '\\' + case CR | LF if inTextBlock => + in.next() + return case _ => - syntaxError(in.cpos - 1, "invalid escape character") - putChar(in.ch) + if (!scanOnly) syntaxError(in.cpos - 1, "invalid escape character") + in.ch } in.next() + c } } else { - putChar(in.ch) + val c = in.ch in.next() + c } + if (!scanOnly) putChar(c) + } + + /** read a triple-quote delimited text block, starting after the first three + * double quotes + */ + private def getTextBlock(): Unit = { + // Open delimiter is followed by optional space, then a newline + while (in.ch == ' ' || in.ch == '\t' || in.ch == FF) { + in.next() + } + if (in.ch != LF && in.ch != CR) { // CR-LF is already normalized into LF by `JavaCharArrayReader` + syntaxError("illegal text block open delimiter sequence, missing line terminator") + return + } + in.next() + + /* Do a lookahead scan over the full text block to: + * - compute common white space prefix + * - find the offset where the text block ends + */ + var commonWhiteSpacePrefix = Int.MaxValue + var blockEndOffset = 0 + val backtrackTo = in.copy + var blockClosed = false + var lineWhiteSpacePrefix = 0 + var lineIsOnlyWhitespace = true + while (!blockClosed && (in.isUnicode || in.ch != SU)) { + if (in.ch == '\"') { // Potential end of the block + in.next() + if (in.ch == '\"') { + in.next() + if (in.ch == '\"') { + blockClosed = true + commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix + blockEndOffset = in.cpos - 2 + } + } + + // Not the end of the block - just a single or double " character + if (!blockClosed) { + lineIsOnlyWhitespace = false + } + } else if (in.ch == CR || in.ch == LF) { // new line in the block + in.next() + if (!lineIsOnlyWhitespace) { + commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix + } + lineWhiteSpacePrefix = 0 + lineIsOnlyWhitespace = true + } else if (lineIsOnlyWhitespace && Character.isWhitespace(in.ch)) { // extend white space prefix + in.next() + lineWhiteSpacePrefix += 1 + } else { + lineIsOnlyWhitespace = false + getlitch(scanOnly = true, inTextBlock = true) + } + } + + // Bail out if the block never did have an end + if (!blockClosed) { + syntaxError("unclosed text block") + return + } + + // Second pass: construct the literal string value this time + in = backtrackTo + while (in.cpos < blockEndOffset) { + // Drop the line's leading whitespace + var remainingPrefix = commonWhiteSpacePrefix + while (remainingPrefix > 0 && in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) { + in.next() + remainingPrefix -= 1 + } + + var trailingWhitespaceLength = 0 + var escapedNewline = false // Does the line end with `\`? + while (in.ch != CR && in.ch != LF && in.cpos < blockEndOffset && !escapedNewline) { + if (Character.isWhitespace(in.ch)) { + trailingWhitespaceLength += 1 + } else { + trailingWhitespaceLength = 0 + } + + // Detect if the line is about to end with `\` + if (in.ch == '\\' && { + val lookahead = in.copy + lookahead.next() + lookahead.ch == CR || lookahead.ch == LF + }) { + escapedNewline = true + } + + getlitch(scanOnly = false, inTextBlock = true) + } + + // Drop the line's trailing whitespace + popNChars(trailingWhitespaceLength) + + // Normalize line terminators + if ((in.ch == CR || in.ch == LF) && !escapedNewline) { + in.next() + putChar('\n') + } + } + + token = STRINGLIT + setName() + + // Trailing """ + in.next() + in.next() + in.next() + } /** read fractional part and exponent of floating point number * if one is present. diff --git a/test/files/neg/text-blocks.check b/test/files/neg/text-blocks.check new file mode 100644 index 000000000000..8a9af6292a04 --- /dev/null +++ b/test/files/neg/text-blocks.check @@ -0,0 +1,13 @@ +text-blocks/Invalid1.java:4: error: illegal text block open delimiter sequence, missing line terminator + public static final String badOpeningDelimiter = """non-whitespace + ^ +text-blocks/Invalid1.java:4: error: expected + public static final String badOpeningDelimiter = """non-whitespace + ^ +text-blocks/Invalid1.java:6: error: illegal text block open delimiter sequence, missing line terminator + """; + ^ +text-blocks/Invalid2.java:6: error: unclosed string literal + foo""""; + ^ +4 errors diff --git a/test/files/neg/text-blocks/Invalid1.java b/test/files/neg/text-blocks/Invalid1.java new file mode 100644 index 000000000000..54c7e98d9219 --- /dev/null +++ b/test/files/neg/text-blocks/Invalid1.java @@ -0,0 +1,7 @@ +// javaVersion: 15+ +class Invalid1 { + + public static final String badOpeningDelimiter = """non-whitespace + foo + """; +} diff --git a/test/files/neg/text-blocks/Invalid2.java b/test/files/neg/text-blocks/Invalid2.java new file mode 100644 index 000000000000..08b0a57548aa --- /dev/null +++ b/test/files/neg/text-blocks/Invalid2.java @@ -0,0 +1,7 @@ +// javaVersion: 15+ +class Invalid2 { + + // Closing delimiter is first three eligible `"""`, not last + public static final String closingDelimiterIsNotScalas = """ + foo""""; +} diff --git a/test/files/run/t12290.check b/test/files/run/t12290.check new file mode 100644 index 000000000000..00d93b3657dd --- /dev/null +++ b/test/files/run/t12290.check @@ -0,0 +1,61 @@ +==== +A text + +==== + + +

Hello, world

+ + + +==== +SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB" +WHERE "CITY" = 'INDIANAPOLIS' +ORDER BY "EMP_ID", "LAST_NAME"; + +==== + + +

Hello, world

+ + + +==== + + +

Hello, world

+ + + +==== + + +

Hello, world

+ + + + +==== + + +

Hello , world

+ + + +==== + this line has 4 tabs before it + this line has 5 spaces before it and space after it + this line has 2 tabs and 3 spaces before it +  this line has 6 spaces before it + +==== +String text = """ + A text block inside a text block +"""; + +==== +foo bar +baz +==== + +==== diff --git a/test/files/run/t12290/Test.scala b/test/files/run/t12290/Test.scala new file mode 100644 index 000000000000..13b01b51478c --- /dev/null +++ b/test/files/run/t12290/Test.scala @@ -0,0 +1,30 @@ +// javaVersion: 15+ +/* Using `valueOf` is a way to check that the Java string literals were properly + * parsed, since the parsed value is what the Scala compiler will use when + * resolving the singleton types + */ +object Test extends App { + println("====") + println(valueOf[TextBlocks.aText.type]) + println("====") + println(valueOf[TextBlocks.html1.type]) + println("====") + println(valueOf[TextBlocks.query.type]) + println("====") + println(valueOf[TextBlocks.html2.type]) + println("====") + println(valueOf[TextBlocks.html3.type]) + println("====") + println(valueOf[TextBlocks.html4.type]) + println("====") + println(valueOf[TextBlocks.html5.type]) + println("====") + println(valueOf[TextBlocks.mixedIndents.type]) + println("====") + println(valueOf[TextBlocks.code.type]) + println("====") + println(valueOf[TextBlocks.simpleString.type]) + println("====") + println(valueOf[TextBlocks.emptyString.type]) + println("====") +} diff --git a/test/files/run/t12290/TextBlocks.java b/test/files/run/t12290/TextBlocks.java new file mode 100644 index 000000000000..e1928e74c971 --- /dev/null +++ b/test/files/run/t12290/TextBlocks.java @@ -0,0 +1,78 @@ +// javaVersion: 15+ +class TextBlocks { + + final static String aText = """ + A text + """; + + final static String html1 = """ + + +

Hello, world

+ + + """; + + // quote characters are unescaped + final static String query = """ + SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB" + WHERE "CITY" = 'INDIANAPOLIS' + ORDER BY "EMP_ID", "LAST_NAME"; + """; + + // incidental trailing spaces + final static String html2 = """ + + +

Hello, world

+ + + """; + + // trailing delimiter influences + final static String html3 = """ + + +

Hello, world

+ + + """; + + // blank line does not affect + final static String html4 = """ + + +

Hello, world

+ + + + """; + + // escape sequences + final static String html5 = """ + \n + \ +

Hello\s,\tworld

+ + + """; + + // mixed indentation + final static String mixedIndents = """ + \s this line has 4 tabs before it + this line has 5 spaces before it and space after it \u0020 \u000C\u0020 \u001E + this line has 2 tabs and 3 spaces before it +\u0020 \u000C\u0020 \u001E this line has 6 spaces before it + """; + + final static String code = + """ + String text = \""" + A text block inside a text block + \"""; + """; + + final static String simpleString = "foo\tbar\nbaz"; + + final static String emptyString = ""; +}