From 9eb87a235e004bdaae27285d43578682adfcd8a3 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 16:14:19 +0800 Subject: [PATCH 01/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 168 +++++++++++++++++- project/project/build.properties | 1 + tests/neg/dedented-string-literals.check | 16 ++ tests/neg/dedented-string-literals.scala | 22 +++ tests/pos/dedented-string-literals.scala | 129 ++++++++++++++ tests/run/dedented-string-literals.check | 52 ++++++ tests/run/dedented-string-literals.scala | 121 +++++++++++++ 7 files changed, 508 insertions(+), 1 deletion(-) create mode 100644 project/project/build.properties create mode 100644 tests/neg/dedented-string-literals.check create mode 100644 tests/neg/dedented-string-literals.scala create mode 100644 tests/pos/dedented-string-literals.scala create mode 100644 tests/run/dedented-string-literals.check create mode 100644 tests/run/dedented-string-literals.scala diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 52e03de60dea..6b0e0313b460 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -978,7 +978,19 @@ object Scanners { case '\'' => def fetchSingleQuote(): Unit = { nextChar() - if isIdentifierStart(ch) then + // Check for triple single quote (dedented string literal) + if (ch == '\'') { + nextChar() + if (ch == '\'') { + // We have at least ''' + getDedentedStringLit() + } + else { + // We have '' followed by something else + error(em"empty character literal") + } + } + else if isIdentifierStart(ch) then charLitOr { getIdentRest(); QUOTEID } else if isOperatorPart(ch) && ch != '\\' then charLitOr { getOperatorRest(); QUOTEID } @@ -1255,6 +1267,160 @@ object Scanners { else error(em"unclosed string literal") } + /** Parse a dedented string literal (triple single quotes) + * Requirements: + * - Must start with ''' followed by newline + * - Must end with newline + whitespace + ''' + * - Removes first newline after opening delimiter + * - Removes final newline and preceding whitespace before closing delimiter + * - Strips indentation equal to closing delimiter indentation + * - All lines must be empty or indented further than closing delimiter + * - Supports extended delimiters (e.g., '''', ''''') + */ + private def getDedentedStringLit(): Unit = { + // Count opening quotes (already consumed 3) + nextChar() + var quoteCount = 3 + while (ch == '\'') { + quoteCount += 1 + nextChar() + } + + // Must be followed by a newline + if (ch != LF && ch != CR) { + error(em"dedented string literal must start with newline after opening quotes") + token = ERROR + } else { + // Skip the initial newline (CR LF or just LF) + if (ch == CR) nextRawChar() + if (ch == LF) nextRawChar() + + // Collect all lines until we find the closing delimiter + val lines = scala.collection.mutable.ArrayBuffer[String]() + val lineIndents = scala.collection.mutable.ArrayBuffer[String]() + var currentLine = new StringBuilder + var currentIndent = new StringBuilder + var atLineStart = true + var closingIndent: String = null + var foundClosing = false + + while (!foundClosing && ch != SU) { + if (atLineStart) { + // Collect indentation + currentIndent.clear() + while (ch == ' ' || ch == '\t') { + currentIndent.append(ch) + nextRawChar() + } + + // Check if this might be the closing delimiter + if (ch == '\'') { + var endQuoteCount = 0 + val savedOffset = charOffset + while (ch == '\'' && endQuoteCount < quoteCount + 1) { + endQuoteCount += 1 + nextRawChar() + } + + if (endQuoteCount == quoteCount && (ch == SU || ch == CR || ch == LF || ch == ' ' || ch == '\t' || ch == ';')) { + // Found closing delimiter + foundClosing = true + closingIndent = currentIndent.toString + // Consume any trailing whitespace/newlines after closing quotes + while (ch == ' ' || ch == '\t') nextChar() + if (ch == CR || ch == LF) nextChar() + } else { + // False alarm, these quotes are part of the content + // We need to restore and add them to current line + currentLine.append(currentIndent) + for (_ <- 0 until endQuoteCount) currentLine.append('\'') + atLineStart = false + } + } else { + atLineStart = false + } + } + + if (!foundClosing && !atLineStart) { + // Regular content + if (ch == CR || ch == LF) { + // End of line + lineIndents += currentIndent.toString + lines += currentLine.toString + currentLine.clear() + currentIndent.clear() + + // Normalize newlines to \n + if (ch == CR) nextRawChar() + if (ch == LF) nextRawChar() + atLineStart = true + } else { + currentLine.append(ch) + nextRawChar() + } + } + } + + if (!foundClosing) { + incompleteInputError(em"unclosed dedented string literal") + } else if (closingIndent == null) { + error(em"internal error: closing indent not set") + token = ERROR + } else { + // Validate and dedent all lines + val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() + val closingIndentLen = closingIndent.length + var hasSpaces = false + var hasTabs = false + + for (indent <- closingIndent) { + if (indent == ' ') hasSpaces = true + if (indent == '\t') hasTabs = true + } + + var hasError = false + for (i <- 0 until lines.length if !hasError) { + val line = lines(i) + val indent = lineIndents(i) + + // Check for mixed tabs and spaces + var lineHasSpaces = false + var lineHasTabs = false + for (ch <- indent) { + if (ch == ' ') lineHasSpaces = true + if (ch == '\t') lineHasTabs = true + } + + if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { + error(em"dedented string literal cannot mix tabs and spaces in indentation") + token = ERROR + hasError = true + } else if (line.isEmpty) { + // Empty lines are allowed + dedentedLines += "" + } else { + // Non-empty lines must be indented at least as much as closing delimiter + if (!indent.startsWith(closingIndent)) { + error(em"line in dedented string literal must be indented at least as much as the closing delimiter") + token = ERROR + hasError = true + } else { + // Remove the closing indentation from this line + dedentedLines += indent.substring(closingIndentLen) + line + } + } + } + + if (!hasError) { + // Set the string value (join with \n) + strVal = dedentedLines.mkString("\n") + litBuf.clear() + token = STRINGLIT + } + } + } + } + private def getRawStringLit(): Unit = if (ch == '\"') { nextRawChar() diff --git a/project/project/build.properties b/project/project/build.properties new file mode 100644 index 000000000000..0b699c3052d7 --- /dev/null +++ b/project/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.10.2 diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check new file mode 100644 index 000000000000..cf32ebdf43ec --- /dev/null +++ b/tests/neg/dedented-string-literals.check @@ -0,0 +1,16 @@ +-- Error: tests/neg/dedented-string-literals.scala:5:36 +5 | val noNewlineAfterOpen = '''content on same line // error + | ^ + |dedented string literal must start with newline after opening quotes +-- Error: tests/neg/dedented-string-literals.scala:8:0 +8 |content + |^ + |line in dedented string literal must be indented at least as much as the closing delimiter +-- Error: tests/neg/dedented-string-literals.scala:14:0 +14 | space line + | ^ + |dedented string literal cannot mix tabs and spaces in indentation +-- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:19:0 +19 | // error: missing closing quotes + | ^^ + |unclosed dedented string literal diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala new file mode 100644 index 000000000000..9a4c852e5b3d --- /dev/null +++ b/tests/neg/dedented-string-literals.scala @@ -0,0 +1,22 @@ +// Test error cases for dedented string literals + +object DedentedStringErrors { + // Error: No newline after opening quotes + val noNewlineAfterOpen = '''content on same line // error + + // Error: Content not indented enough + val notIndented = ''' +content + ''' // error + + // Error: Mixed tabs and spaces + val mixedTabsSpaces = ''' + tab line + space line + ''' // error + + // Error: Unclosed literal + val unclosed = ''' +some content + // error: missing closing quotes +} diff --git a/tests/pos/dedented-string-literals.scala b/tests/pos/dedented-string-literals.scala new file mode 100644 index 000000000000..45cdb1431745 --- /dev/null +++ b/tests/pos/dedented-string-literals.scala @@ -0,0 +1,129 @@ +// Test dedented string literals as specified in SIP + +object DedentedStringLiterals { + // Basic usage + val basic = ''' +i am cow +hear me moo +''' + + // With indentation preserved + val withIndent = ''' + i am cow + hear me moo +''' + + // Empty string + val empty = ''' +''' + + // Single line of content + val singleLine = ''' +hello world +''' + + // Multiple blank lines + val blankLines = ''' +line 1 + +line 3 +''' + + // Deep indentation + val deepIndent = ''' + deeply + indented + content +''' + + // Mixed content indentation (more than closing) + val mixedIndent = ''' + first level + second level + third level +''' + + // Using extended delimiter to include ''' + val withTripleQuotes = '''' +''' +i am cow +hear me moo +''' +'''' + + // Extended delimiter with 5 quotes + val extended5 = ''''' +'''' +content with four quotes +'''' +''''' + + // Tabs for indentation + val withTabs = ''' + tab indented + content here +''' + + // Empty lines are allowed anywhere + val emptyLinesAnywhere = ''' + +content + +more content + +''' + + // Testing in different contexts + def inFunction = ''' + function content + more content +''' + + class InClass { + val inClass = ''' + class member + content +''' + } + + // In a list + val list = List( + ''' + first + ''', + ''' + second + ''', + ''' + third + ''' + ) + + // Nested in expressions + val nested = "prefix" + ''' + middle + ''' + "suffix" + + // With special characters + val specialChars = ''' +!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ +''' + + // Unicode content + val unicode = ''' +Hello 世界 +Καλημέρα κόσμε +''' + + // Zero-width closing indent + val zeroIndent = ''' +content +''' + + // Content with quotes + val withQuotes = ''' +"double quotes" +'single quote' +'' +''' +} diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check new file mode 100644 index 000000000000..971a52a35011 --- /dev/null +++ b/tests/run/dedented-string-literals.check @@ -0,0 +1,52 @@ +Basic: +i am cow +hear me moo + +With indent: +i am cow +hear me moo + +Empty: +[] + +Single line: +hello world + +Blank lines: +line 1 + +line 3 + +Deep indent: +deeply +indented +content + +Mixed indent: +first level + second level + third level + +With triple quotes: +''' +i am cow +''' + +Normalized newlines: +Has only LF: true + +Special chars: +!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ + +Unicode: +Hello 世界 + +Zero indent: +content + +Precise: +Length: 7 +Content: [ab +cd] +Chars: List(a, b, +, c, d) diff --git a/tests/run/dedented-string-literals.scala b/tests/run/dedented-string-literals.scala new file mode 100644 index 000000000000..c6b3ea56840d --- /dev/null +++ b/tests/run/dedented-string-literals.scala @@ -0,0 +1,121 @@ +// Test runtime behavior of dedented string literals + +object Test { + def main(args: Array[String]): Unit = { + // Test basic dedenting + val basic = ''' +i am cow +hear me moo +''' + println("Basic:") + println(basic) + println() + + // Test with indentation preserved + val withIndent = ''' + i am cow + hear me moo +''' + println("With indent:") + println(withIndent) + println() + + // Test empty string + val empty = ''' +''' + println("Empty:") + println(s"[${empty}]") + println() + + // Test single line + val singleLine = ''' +hello world +''' + println("Single line:") + println(singleLine) + println() + + // Test blank lines + val blankLines = ''' +line 1 + +line 3 +''' + println("Blank lines:") + println(blankLines) + println() + + // Test deep indentation removal + val deepIndent = ''' + deeply + indented + content +''' + println("Deep indent:") + println(deepIndent) + println() + + // Test mixed indentation levels (preserved) + val mixedIndent = ''' + first level + second level + third level +''' + println("Mixed indent:") + println(mixedIndent) + println() + + // Test extended delimiter with embedded ''' + val withTripleQuotes = '''' +''' +i am cow +''' +'''' + println("With triple quotes:") + println(withTripleQuotes) + println() + + // Test that newlines are normalized to \n + val normalized = ''' +line1 +line2 +''' + println("Normalized newlines:") + println(s"Has only LF: ${!normalized.contains('\r')}") + println() + + // Test special characters + val specialChars = ''' +!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ +''' + println("Special chars:") + println(specialChars) + println() + + // Test unicode + val unicode = ''' +Hello 世界 +''' + println("Unicode:") + println(unicode) + println() + + // Test zero-width closing indent + val zeroIndent = ''' +content +''' + println("Zero indent:") + println(zeroIndent) + println() + + // Test content length and character accuracy + val precise = ''' + ab + cd +''' + println("Precise:") + println(s"Length: ${precise.length}") + println(s"Content: [${precise}]") + println(s"Chars: ${precise.toList}") + } +} From 5109265099165105b7edb7d743f2749e63af9f0e Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 17:47:45 +0800 Subject: [PATCH 02/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 7 ++--- tests/neg/dedented-string-literals.check | 22 ++++++++-------- tests/neg/dedented-string-literals.scala | 11 ++++---- tests/run/dedented-string-literals.check | 26 +++++++++---------- 4 files changed, 31 insertions(+), 35 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 6b0e0313b460..30375ef7cf82 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1322,13 +1322,10 @@ object Scanners { nextRawChar() } - if (endQuoteCount == quoteCount && (ch == SU || ch == CR || ch == LF || ch == ' ' || ch == '\t' || ch == ';')) { - // Found closing delimiter + if (endQuoteCount == quoteCount && ch != '\'') { + // Found closing delimiter (not followed by another quote) foundClosing = true closingIndent = currentIndent.toString - // Consume any trailing whitespace/newlines after closing quotes - while (ch == ' ' || ch == '\t') nextChar() - if (ch == CR || ch == LF) nextChar() } else { // False alarm, these quotes are part of the content // We need to restore and add them to current line diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check index cf32ebdf43ec..fb135a44bfd8 100644 --- a/tests/neg/dedented-string-literals.check +++ b/tests/neg/dedented-string-literals.check @@ -1,16 +1,16 @@ --- Error: tests/neg/dedented-string-literals.scala:5:36 +-- Error: tests/neg/dedented-string-literals.scala:5:27 5 | val noNewlineAfterOpen = '''content on same line // error - | ^ + | ^ |dedented string literal must start with newline after opening quotes --- Error: tests/neg/dedented-string-literals.scala:8:0 -8 |content - |^ +-- Error: tests/neg/dedented-string-literals.scala:8:20 +8 | val notIndented = ''' + | ^ |line in dedented string literal must be indented at least as much as the closing delimiter --- Error: tests/neg/dedented-string-literals.scala:14:0 -14 | space line - | ^ +-- Error: tests/neg/dedented-string-literals.scala:13:24 +13 | val mixedTabsSpaces = ''' + | ^ |dedented string literal cannot mix tabs and spaces in indentation --- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:19:0 -19 | // error: missing closing quotes - | ^^ +-- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:19:17 +19 | val unclosed = ''' + | ^ |unclosed dedented string literal diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index 9a4c852e5b3d..88058c5347e8 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -4,19 +4,18 @@ object DedentedStringErrors { // Error: No newline after opening quotes val noNewlineAfterOpen = '''content on same line // error - // Error: Content not indented enough + // Error: Content not indented enough // error val notIndented = ''' content - ''' // error + ''' - // Error: Mixed tabs and spaces + // Error: Mixed tabs and spaces // error val mixedTabsSpaces = ''' tab line space line - ''' // error + ''' - // Error: Unclosed literal + // Error: Unclosed literal // error val unclosed = ''' some content - // error: missing closing quotes } diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index 971a52a35011..7838b833bfff 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -3,8 +3,8 @@ i am cow hear me moo With indent: -i am cow -hear me moo + i am cow + hear me moo Empty: [] @@ -18,14 +18,14 @@ line 1 line 3 Deep indent: -deeply -indented -content + deeply + indented + content Mixed indent: -first level - second level - third level + first level + second level + third level With triple quotes: ''' @@ -45,8 +45,8 @@ Zero indent: content Precise: -Length: 7 -Content: [ab -cd] -Chars: List(a, b, -, c, d) +Length: 9 +Content: [ ab + cd] +Chars: List( , , a, b, +, , , c, d) From ab9a589d02fffa6185d4d13fba4a5ca3f321bd68 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 17:56:43 +0800 Subject: [PATCH 03/30] . --- tests/neg/dedented-string-literals.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index 88058c5347e8..7511ccc86cc7 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -1,21 +1,21 @@ // Test error cases for dedented string literals -object DedentedStringErrors { +object DedentedStringErrors { // nopos-error // nopos-error // nopos-error // Error: No newline after opening quotes val noNewlineAfterOpen = '''content on same line // error - // Error: Content not indented enough // error + // Error: Content not indented enough val notIndented = ''' content ''' - // Error: Mixed tabs and spaces // error + // Error: Mixed tabs and spaces val mixedTabsSpaces = ''' tab line space line ''' - // Error: Unclosed literal // error + // Error: Unclosed literal val unclosed = ''' some content } From 00f04b8b1a8287ecd809435ed651613d07063ace Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 19:06:07 +0800 Subject: [PATCH 04/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 347 ++++++++++++------ project/project/build.properties | 1 - tests/neg/dedented-string-literals.check | 9 + tests/neg/dedented-string-literals.scala | 20 + tests/pos/dedented-string-literals.scala | 129 ------- tests/run/dedented-string-literals.check | 70 ++++ tests/run/dedented-string-literals.scala | 253 +++++++++++-- 7 files changed, 548 insertions(+), 281 deletions(-) delete mode 100644 project/project/build.properties delete mode 100644 tests/pos/dedented-string-literals.scala diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 30375ef7cf82..8e0076b2260a 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -374,6 +374,7 @@ object Scanners { case STRINGLIT => currentRegion match { case InString(_, outer) => currentRegion = outer + case InDedentedString(outer) => currentRegion = outer case _ => } case _ => @@ -385,6 +386,7 @@ object Scanners { lastOffset = lastCharOffset currentRegion match case InString(multiLine, _) if lastToken != STRINGPART => fetchStringPart(multiLine) + case InDedentedString(_) if lastToken != STRINGPART => fetchDedentedStringPart() case _ => fetchToken() if token == ERROR then adjustSepRegions(STRINGLIT) // make sure we exit enclosing string literal else @@ -888,6 +890,14 @@ object Scanners { getIdentRest() if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID + else if (ch == '\'' && token == IDENTIFIER) + // Check for ''' to support dedented string interpolation + val la = lookaheadReader() + la.nextChar() + if (la.ch == '\'') + la.nextChar() + if (la.ch == '\'') + token = INTERPOLATIONID case '<' => // is XMLSTART? def fetchLT() = { val last = if (charOffset >= 2) buf(charOffset - 2) else ' ' @@ -976,6 +986,10 @@ object Scanners { } fetchDoubleQuote() case '\'' => + def dedentedStringPart() = { + getDedentedString(isInterpolated = true) + currentRegion = InDedentedString(currentRegion) + } def fetchSingleQuote(): Unit = { nextChar() // Check for triple single quote (dedented string literal) @@ -983,7 +997,14 @@ object Scanners { nextChar() if (ch == '\'') { // We have at least ''' - getDedentedStringLit() + // Check if this is an interpolated dedented string + if (token == INTERPOLATIONID) { + // For interpolation, handle as string part + nextRawChar() + dedentedStringPart() + } else { + getDedentedString(isInterpolated = false) + } } else { // We have '' followed by something else @@ -1276,147 +1297,236 @@ object Scanners { * - Strips indentation equal to closing delimiter indentation * - All lines must be empty or indented further than closing delimiter * - Supports extended delimiters (e.g., '''', ''''') + * @param isInterpolated If true, handles $ interpolation and returns STRINGPART tokens */ - private def getDedentedStringLit(): Unit = { - // Count opening quotes (already consumed 3) - nextChar() - var quoteCount = 3 - while (ch == '\'') { - quoteCount += 1 - nextChar() - } - - // Must be followed by a newline - if (ch != LF && ch != CR) { - error(em"dedented string literal must start with newline after opening quotes") - token = ERROR + private def getDedentedString(isInterpolated: Boolean): Unit = { + if (isInterpolated) { + // For interpolated strings: parse incrementally, handling $ expressions + getDedentedStringPartImpl() } else { - // Skip the initial newline (CR LF or just LF) - if (ch == CR) nextRawChar() - if (ch == LF) nextRawChar() - - // Collect all lines until we find the closing delimiter - val lines = scala.collection.mutable.ArrayBuffer[String]() - val lineIndents = scala.collection.mutable.ArrayBuffer[String]() - var currentLine = new StringBuilder - var currentIndent = new StringBuilder - var atLineStart = true - var closingIndent: String = null - var foundClosing = false - - while (!foundClosing && ch != SU) { - if (atLineStart) { - // Collect indentation - currentIndent.clear() - while (ch == ' ' || ch == '\t') { - currentIndent.append(ch) - nextRawChar() - } + // For non-interpolated strings: parse entire string and dedent + // Count opening quotes (already consumed 3) + nextChar() + var quoteCount = 3 + while (ch == '\'') { + quoteCount += 1 + nextChar() + } - // Check if this might be the closing delimiter - if (ch == '\'') { - var endQuoteCount = 0 - val savedOffset = charOffset - while (ch == '\'' && endQuoteCount < quoteCount + 1) { - endQuoteCount += 1 + // Must be followed by a newline + if (ch != LF && ch != CR) { + error(em"dedented string literal must start with newline after opening quotes") + token = ERROR + } else { + // Skip the initial newline (CR LF or just LF) + if (ch == CR) nextRawChar() + if (ch == LF) nextRawChar() + + // Collect all lines until we find the closing delimiter + val lines = scala.collection.mutable.ArrayBuffer[String]() + val lineIndents = scala.collection.mutable.ArrayBuffer[String]() + var currentLine = new StringBuilder + var currentIndent = new StringBuilder + var atLineStart = true + var closingIndent: String = null + var foundClosing = false + + while (!foundClosing && ch != SU) { + if (atLineStart) { + // Collect indentation + currentIndent.clear() + while (ch == ' ' || ch == '\t') { + currentIndent.append(ch) nextRawChar() } - if (endQuoteCount == quoteCount && ch != '\'') { - // Found closing delimiter (not followed by another quote) - foundClosing = true - closingIndent = currentIndent.toString + // Check if this might be the closing delimiter + if (ch == '\'') { + var endQuoteCount = 0 + while (ch == '\'' && endQuoteCount < quoteCount + 1) { + endQuoteCount += 1 + nextRawChar() + } + + if (endQuoteCount == quoteCount && ch != '\'') { + // Found closing delimiter (not followed by another quote) + foundClosing = true + closingIndent = currentIndent.toString + } else { + // False alarm, these quotes are part of the content + // We need to restore and add them to current line + currentLine.append(currentIndent) + for (_ <- 0 until endQuoteCount) currentLine.append('\'') + atLineStart = false + } } else { - // False alarm, these quotes are part of the content - // We need to restore and add them to current line - currentLine.append(currentIndent) - for (_ <- 0 until endQuoteCount) currentLine.append('\'') atLineStart = false } - } else { - atLineStart = false } - } - if (!foundClosing && !atLineStart) { - // Regular content - if (ch == CR || ch == LF) { - // End of line - lineIndents += currentIndent.toString - lines += currentLine.toString - currentLine.clear() - currentIndent.clear() - - // Normalize newlines to \n - if (ch == CR) nextRawChar() - if (ch == LF) nextRawChar() - atLineStart = true - } else { - currentLine.append(ch) - nextRawChar() + if (!foundClosing && !atLineStart) { + // Regular content + if (ch == CR || ch == LF) { + // End of line + lineIndents += currentIndent.toString + lines += currentLine.toString + currentLine.clear() + currentIndent.clear() + + // Normalize newlines to \n + if (ch == CR) nextRawChar() + if (ch == LF) nextRawChar() + atLineStart = true + } else { + currentLine.append(ch) + nextRawChar() + } } } - } - if (!foundClosing) { - incompleteInputError(em"unclosed dedented string literal") - } else if (closingIndent == null) { - error(em"internal error: closing indent not set") - token = ERROR - } else { - // Validate and dedent all lines - val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() - val closingIndentLen = closingIndent.length - var hasSpaces = false - var hasTabs = false - - for (indent <- closingIndent) { - if (indent == ' ') hasSpaces = true - if (indent == '\t') hasTabs = true - } - - var hasError = false - for (i <- 0 until lines.length if !hasError) { - val line = lines(i) - val indent = lineIndents(i) - - // Check for mixed tabs and spaces - var lineHasSpaces = false - var lineHasTabs = false - for (ch <- indent) { - if (ch == ' ') lineHasSpaces = true - if (ch == '\t') lineHasTabs = true + if (!foundClosing) { + incompleteInputError(em"unclosed dedented string literal") + } else if (closingIndent == null) { + error(em"internal error: closing indent not set") + token = ERROR + } else { + // Validate and dedent all lines + val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() + val closingIndentLen = closingIndent.length + var hasSpaces = false + var hasTabs = false + + for (indent <- closingIndent) { + if (indent == ' ') hasSpaces = true + if (indent == '\t') hasTabs = true } - if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { - error(em"dedented string literal cannot mix tabs and spaces in indentation") - token = ERROR - hasError = true - } else if (line.isEmpty) { - // Empty lines are allowed - dedentedLines += "" - } else { - // Non-empty lines must be indented at least as much as closing delimiter - if (!indent.startsWith(closingIndent)) { - error(em"line in dedented string literal must be indented at least as much as the closing delimiter") + var hasError = false + for (i <- 0 until lines.length if !hasError) { + val line = lines(i) + val indent = lineIndents(i) + + // Check for mixed tabs and spaces + var lineHasSpaces = false + var lineHasTabs = false + for (ch <- indent) { + if (ch == ' ') lineHasSpaces = true + if (ch == '\t') lineHasTabs = true + } + + if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { + error(em"dedented string literal cannot mix tabs and spaces in indentation") token = ERROR hasError = true + } else if (line.isEmpty) { + // Empty lines are allowed + dedentedLines += "" } else { - // Remove the closing indentation from this line - dedentedLines += indent.substring(closingIndentLen) + line + // Non-empty lines must be indented at least as much as closing delimiter + if (!indent.startsWith(closingIndent)) { + error(em"line in dedented string literal must be indented at least as much as the closing delimiter") + token = ERROR + hasError = true + } else { + // Remove the closing indentation from this line + dedentedLines += indent.substring(closingIndentLen) + line + } } } + + if (!hasError) { + // Set the string value (join with \n) + strVal = dedentedLines.mkString("\n") + litBuf.clear() + token = STRINGLIT + } } + } + } + } - if (!hasError) { - // Set the string value (join with \n) - strVal = dedentedLines.mkString("\n") - litBuf.clear() + /** For interpolated dedented strings - parse string content until ''' or $ */ + @tailrec private def getDedentedStringPartImpl(): Unit = + // Check for closing ''' delimiter + if (ch == '\'') { + nextRawChar() + if (ch == '\'') { + nextRawChar() + if (ch == '\'') { + // Found closing ''' + nextChar() + // For now, set the string value without dedenting + // TODO: implement proper dedenting for interpolated strings + setStrVal() token = STRINGLIT } + else { + // Two quotes followed by something else, add them to content + putChar('\'') + putChar('\'') + getDedentedStringPartImpl() + } + } + else { + // Single quote followed by something else, add it to content + putChar('\'') + getDedentedStringPartImpl() } } - } + else if (ch == '$') { + // Handle interpolation + def getInterpolatedIdentRest(hasSupplement: Boolean): Unit = + @tailrec def loopRest(): Unit = + if ch != SU && isUnicodeIdentifierPart(ch) then + putChar(ch) ; nextRawChar() + loopRest() + else if atSupplementary(ch, isUnicodeIdentifierPart) then + putChar(ch) ; nextRawChar() + putChar(ch) ; nextRawChar() + loopRest() + else + finishNamedToken(IDENTIFIER, target = next) + end loopRest + setStrVal() + token = STRINGPART + next.lastOffset = charOffset - 1 + next.offset = charOffset - 1 + putChar(ch) ; nextRawChar() + if hasSupplement then + putChar(ch) ; nextRawChar() + loopRest() + end getInterpolatedIdentRest + + nextRawChar() + if (ch == '$' || ch == '\'') { + putChar(ch) + nextRawChar() + getDedentedStringPartImpl() + } + else if (ch == '{') { + setStrVal() + token = STRINGPART + } + else if isUnicodeIdentifierStart(ch) || ch == '_' then + getInterpolatedIdentRest(hasSupplement = false) + else if atSupplementary(ch, isUnicodeIdentifierStart) then + getInterpolatedIdentRest(hasSupplement = true) + else + error("invalid string interpolation: `$$`, `$'`, `$`ident or `$`BlockExpr expected".toMessage, off = charOffset - 2) + putChar('$') + getDedentedStringPartImpl() + } + else { + val isUnclosedLiteral = !isUnicodeEscape && ch == SU + if (isUnclosedLiteral) + incompleteInputError(em"unclosed dedented string literal") + else { + putChar(ch) + nextRawChar() + getDedentedStringPartImpl() + } + } + end getDedentedStringPartImpl private def getRawStringLit(): Unit = if (ch == '\"') { @@ -1523,6 +1633,11 @@ object Scanners { getStringPart(multiLine) } + private def fetchDedentedStringPart() = { + offset = charOffset - 1 + getDedentedString(isInterpolated = true) + } + private def isTripleQuote(): Boolean = if (ch == '"') { nextRawChar() @@ -1825,6 +1940,7 @@ object Scanners { private def delimiter = this match case _: InString => "}(in string)" + case _: InDedentedString => "}(in dedented string)" case InParens(LPAREN, _) => ")" case InParens(LBRACKET, _) => "]" case _: InBraces => "}" @@ -1840,6 +1956,7 @@ object Scanners { end Region case class InString(multiLine: Boolean, outer: Region) extends Region(RBRACE) + case class InDedentedString(outer: Region) extends Region(RBRACE) case class InParens(prefix: Token, outer: Region) extends Region(prefix + 1) case class InBraces(outer: Region) extends Region(RBRACE) case class InCase(outer: Region) extends Region(OUTDENT) diff --git a/project/project/build.properties b/project/project/build.properties deleted file mode 100644 index 0b699c3052d7..000000000000 --- a/project/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version=1.10.2 diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check index fb135a44bfd8..fcf9b7583ccb 100644 --- a/tests/neg/dedented-string-literals.check +++ b/tests/neg/dedented-string-literals.check @@ -14,3 +14,12 @@ 19 | val unclosed = ''' | ^ |unclosed dedented string literal +-- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:23:35 +23 | val nonWhitespaceBeforeClosing = ''' + | ^ + |unclosed dedented string literal +-- Error: tests/neg/dedented-string-literals.scala:39:4 +39 | onlyAtCompileTime // error + | ^^^^^^^^^^^^^^^^^ + |This method should only be used at compile time + |Do not call at runtime diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index 7511ccc86cc7..7517ed11bee3 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -18,4 +18,24 @@ content // Error: Unclosed literal val unclosed = ''' some content + + // Error: Non-whitespace before closing delimiter + val nonWhitespaceBeforeClosing = ''' + content here + text''' +} + +// Test @compileTimeOnly with dedented string +object CompileTimeOnlyTest { + import scala.annotation.compileTimeOnly + + @compileTimeOnly(''' + This method should only be used at compile time + Do not call at runtime + ''') + def onlyAtCompileTime: Unit = () + + def test(): Unit = { + onlyAtCompileTime // error + } } diff --git a/tests/pos/dedented-string-literals.scala b/tests/pos/dedented-string-literals.scala deleted file mode 100644 index 45cdb1431745..000000000000 --- a/tests/pos/dedented-string-literals.scala +++ /dev/null @@ -1,129 +0,0 @@ -// Test dedented string literals as specified in SIP - -object DedentedStringLiterals { - // Basic usage - val basic = ''' -i am cow -hear me moo -''' - - // With indentation preserved - val withIndent = ''' - i am cow - hear me moo -''' - - // Empty string - val empty = ''' -''' - - // Single line of content - val singleLine = ''' -hello world -''' - - // Multiple blank lines - val blankLines = ''' -line 1 - -line 3 -''' - - // Deep indentation - val deepIndent = ''' - deeply - indented - content -''' - - // Mixed content indentation (more than closing) - val mixedIndent = ''' - first level - second level - third level -''' - - // Using extended delimiter to include ''' - val withTripleQuotes = '''' -''' -i am cow -hear me moo -''' -'''' - - // Extended delimiter with 5 quotes - val extended5 = ''''' -'''' -content with four quotes -'''' -''''' - - // Tabs for indentation - val withTabs = ''' - tab indented - content here -''' - - // Empty lines are allowed anywhere - val emptyLinesAnywhere = ''' - -content - -more content - -''' - - // Testing in different contexts - def inFunction = ''' - function content - more content -''' - - class InClass { - val inClass = ''' - class member - content -''' - } - - // In a list - val list = List( - ''' - first - ''', - ''' - second - ''', - ''' - third - ''' - ) - - // Nested in expressions - val nested = "prefix" + ''' - middle - ''' + "suffix" - - // With special characters - val specialChars = ''' -!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ -''' - - // Unicode content - val unicode = ''' -Hello 世界 -Καλημέρα κόσμε -''' - - // Zero-width closing indent - val zeroIndent = ''' -content -''' - - // Content with quotes - val withQuotes = ''' -"double quotes" -'single quote' -'' -''' -} diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index 7838b833bfff..1a8c6bd9cd86 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -32,6 +32,11 @@ With triple quotes: i am cow ''' +Extended 5 quotes: +'''' +content with four quotes +'''' + Normalized newlines: Has only LF: true @@ -41,6 +46,23 @@ Special chars: Unicode: Hello 世界 +With tabs: + tab indented + content here + +Empty lines anywhere: +[ +content + +more content + +] + +With quotes: +"double quotes" +'single quote' +'' + Zero indent: content @@ -50,3 +72,51 @@ Content: [ ab cd] Chars: List( , , a, b, , , , c, d) + +Interpolated: +Hello Alice +You are 30 years old + +Formatted: +Value: 00042 +Done + +Pattern matching: +Pattern result: matched basic + +Interpolated pattern: +Interpolated pattern result: matched greeting + +Pattern matching (two lines): +Two line pattern result: matched two lines + +Interpolated pattern (two lines): +Two line interpolated result: matched two line greeting + +In function: +function content +more content + +In class: +class member +content + +In list: +Item: [first] +Item: [second] +Item: [third] + +Nested in expressions: +prefixmiddlesuffix + +Type ascription: +Value: [ first line + indented line + third line] +Type matches: true + +valueOf test: +Value: [ alpha + beta + gamma] +Type matches: true diff --git a/tests/run/dedented-string-literals.scala b/tests/run/dedented-string-literals.scala index c6b3ea56840d..75a6d58a9bed 100644 --- a/tests/run/dedented-string-literals.scala +++ b/tests/run/dedented-string-literals.scala @@ -4,118 +4,299 @@ object Test { def main(args: Array[String]): Unit = { // Test basic dedenting val basic = ''' -i am cow -hear me moo -''' + i am cow + hear me moo + ''' println("Basic:") println(basic) println() // Test with indentation preserved val withIndent = ''' - i am cow - hear me moo -''' + i am cow + hear me moo + ''' println("With indent:") println(withIndent) println() // Test empty string val empty = ''' -''' + ''' println("Empty:") println(s"[${empty}]") println() // Test single line val singleLine = ''' -hello world -''' + hello world + ''' println("Single line:") println(singleLine) println() // Test blank lines val blankLines = ''' -line 1 + line 1 -line 3 -''' + line 3 + ''' println("Blank lines:") println(blankLines) println() // Test deep indentation removal val deepIndent = ''' - deeply - indented - content -''' + deeply + indented + content + ''' println("Deep indent:") println(deepIndent) println() // Test mixed indentation levels (preserved) val mixedIndent = ''' - first level - second level - third level -''' + first level + second level + third level + ''' println("Mixed indent:") println(mixedIndent) println() // Test extended delimiter with embedded ''' val withTripleQuotes = '''' -''' -i am cow -''' -'''' + ''' + i am cow + ''' + '''' println("With triple quotes:") println(withTripleQuotes) println() + // Test extended delimiter with 5 quotes + val extended5 = ''''' + '''' + content with four quotes + '''' + ''''' + println("Extended 5 quotes:") + println(extended5) + println() + // Test that newlines are normalized to \n val normalized = ''' -line1 -line2 -''' + line1 + line2 + ''' println("Normalized newlines:") println(s"Has only LF: ${!normalized.contains('\r')}") println() // Test special characters val specialChars = ''' -!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ -''' + !"#$%&()*+,-./:;<=>?@[\]^_`{|}~ + ''' println("Special chars:") println(specialChars) println() // Test unicode val unicode = ''' -Hello 世界 -''' + Hello 世界 + ''' println("Unicode:") println(unicode) println() + // Test tabs for indentation + val withTabs = ''' + tab indented + content here + ''' + println("With tabs:") + println(withTabs) + println() + + // Test empty lines anywhere + val emptyLinesAnywhere = ''' + + content + + more content + + ''' + println("Empty lines anywhere:") + println(s"[${emptyLinesAnywhere}]") + println() + + // Test content with quotes + val withQuotes = ''' + "double quotes" + 'single quote' + '' + ''' + println("With quotes:") + println(withQuotes) + println() + // Test zero-width closing indent val zeroIndent = ''' -content -''' + content + ''' println("Zero indent:") println(zeroIndent) println() // Test content length and character accuracy val precise = ''' - ab - cd -''' + ab + cd + ''' println("Precise:") println(s"Length: ${precise.length}") println(s"Content: [${precise}]") println(s"Chars: ${precise.toList}") + println() + + // Test with string interpolator + val name = "Alice" + val age = 30 + val interpolated = s''' + Hello $name + You are $age years old + ''' + println("Interpolated:") + println(interpolated) + println() + + // Test with f interpolator + val value = 42 + val formatted = f''' + Value: $value%05d + Done + ''' + println("Formatted:") + println(formatted) + println() + + // Test as pattern + def testPattern(s: String): String = s match { + case ''' + test + ''' => "matched basic" + case ''' + other + ''' => "matched other" + case _ => "no match" + } + println("Pattern matching:") + println(s"Pattern result: ${testPattern("test")}") + println() + + // Test as pattern with interpolator + def testInterpolatedPattern(s: String): String = s match { + case s''' + Hello $_ + ''' => "matched greeting" + case _ => "no match" + } + println("Interpolated pattern:") + println(s"Interpolated pattern result: ${testInterpolatedPattern("Hello World")}") + println() + + // Test as pattern with two lines + def testPatternTwoLines(s: String): String = s match { + case ''' + line one + line two + ''' => "matched two lines" + case _ => "no match" + } + println("Pattern matching (two lines):") + println(s"Two line pattern result: ${testPatternTwoLines("line one\nline two")}") + println() + + // Test as pattern with interpolator and two lines + def testInterpolatedPatternTwoLines(s: String): String = s match { + case s''' + First: $_ + Second: $_ + ''' => "matched two line greeting" + case _ => "no match" + } + println("Interpolated pattern (two lines):") + println(s"Two line interpolated result: ${testInterpolatedPatternTwoLines("First: Alice\nSecond: Bob")}") + println() + + // Test in function context + def inFunction = ''' + function content + more content + ''' + println("In function:") + println(inFunction) + println() + + // Test in class context + class InClass { + val inClass = ''' + class member + content + ''' + } + val classInstance = new InClass + println("In class:") + println(classInstance.inClass) + println() + + // Test in a list + val list = List( + ''' + first + ''', + ''' + second + ''', + ''' + third + ''' + ) + println("In list:") + list.foreach { item => + println(s"Item: [$item]") + } + println() + + // Test nested in expressions + val nested = "prefix" + ''' + middle + ''' + "suffix" + println("Nested in expressions:") + println(nested) + println() + + // Test as type ascription (singleton literal type) + val typedVal: ''' + first line + indented line + third line + ''' = " first line\n indented line\n third line" + println("Type ascription:") + println(s"Value: [$typedVal]") + println(s"Type matches: ${typedVal == " first line\n indented line\n third line"}") + println() + + // Test as type parameter to valueOf + import scala.compiletime.valueOf + val valueOfResult = valueOf[''' + alpha + beta + gamma + '''] + println("valueOf test:") + println(s"Value: [$valueOfResult]") + println(s"Type matches: ${valueOfResult == " alpha\n beta\n gamma"}") } } From 40f397f377ad8000c34aedbe3dbebb9ed60fe771 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 19:20:52 +0800 Subject: [PATCH 05/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 302 +++++++++--------- 1 file changed, 152 insertions(+), 150 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 8e0076b2260a..bb515f2c598c 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -986,10 +986,6 @@ object Scanners { } fetchDoubleQuote() case '\'' => - def dedentedStringPart() = { - getDedentedString(isInterpolated = true) - currentRegion = InDedentedString(currentRegion) - } def fetchSingleQuote(): Unit = { nextChar() // Check for triple single quote (dedented string literal) @@ -1001,7 +997,8 @@ object Scanners { if (token == INTERPOLATIONID) { // For interpolation, handle as string part nextRawChar() - dedentedStringPart() + getDedentedString(isInterpolated = true) + currentRegion = InDedentedString(currentRegion) } else { getDedentedString(isInterpolated = false) } @@ -1300,177 +1297,182 @@ object Scanners { * @param isInterpolated If true, handles $ interpolation and returns STRINGPART tokens */ private def getDedentedString(isInterpolated: Boolean): Unit = { + // For interpolated strings, we're already at the first character after ''' + // For non-interpolated, we need to consume the first character + if (!isInterpolated) nextChar() + + // Count opening quotes (already consumed 3) + var quoteCount = 3 + while (ch == '\'') { + quoteCount += 1 + if (isInterpolated) nextRawChar() else nextChar() + } + + // Must be followed by a newline + if (ch != LF && ch != CR) { + error(em"dedented string literal must start with newline after opening quotes") + token = ERROR + return + } + + // Skip the initial newline (CR LF or just LF) + if (ch == CR) nextRawChar() + if (ch == LF) nextRawChar() + + // For interpolated strings, check if we need to handle $ interpolation first if (isInterpolated) { - // For interpolated strings: parse incrementally, handling $ expressions - getDedentedStringPartImpl() + getDedentedStringPartWithDelimiter(quoteCount) } else { - // For non-interpolated strings: parse entire string and dedent - // Count opening quotes (already consumed 3) - nextChar() - var quoteCount = 3 - while (ch == '\'') { - quoteCount += 1 - nextChar() - } + // Collect all lines until we find the closing delimiter + val lines = scala.collection.mutable.ArrayBuffer[String]() + val lineIndents = scala.collection.mutable.ArrayBuffer[String]() + var currentLine = new StringBuilder + var currentIndent = new StringBuilder + var atLineStart = true + var closingIndent: String = null + var foundClosing = false + + while (!foundClosing && ch != SU) { + if (atLineStart) { + // Collect indentation + currentIndent.clear() + while (ch == ' ' || ch == '\t') { + currentIndent.append(ch) + nextRawChar() + } - // Must be followed by a newline - if (ch != LF && ch != CR) { - error(em"dedented string literal must start with newline after opening quotes") - token = ERROR - } else { - // Skip the initial newline (CR LF or just LF) - if (ch == CR) nextRawChar() - if (ch == LF) nextRawChar() - - // Collect all lines until we find the closing delimiter - val lines = scala.collection.mutable.ArrayBuffer[String]() - val lineIndents = scala.collection.mutable.ArrayBuffer[String]() - var currentLine = new StringBuilder - var currentIndent = new StringBuilder - var atLineStart = true - var closingIndent: String = null - var foundClosing = false - - while (!foundClosing && ch != SU) { - if (atLineStart) { - // Collect indentation - currentIndent.clear() - while (ch == ' ' || ch == '\t') { - currentIndent.append(ch) + // Check if this might be the closing delimiter + if (ch == '\'') { + var endQuoteCount = 0 + while (ch == '\'' && endQuoteCount < quoteCount + 1) { + endQuoteCount += 1 nextRawChar() } - // Check if this might be the closing delimiter - if (ch == '\'') { - var endQuoteCount = 0 - while (ch == '\'' && endQuoteCount < quoteCount + 1) { - endQuoteCount += 1 - nextRawChar() - } - - if (endQuoteCount == quoteCount && ch != '\'') { - // Found closing delimiter (not followed by another quote) - foundClosing = true - closingIndent = currentIndent.toString - } else { - // False alarm, these quotes are part of the content - // We need to restore and add them to current line - currentLine.append(currentIndent) - for (_ <- 0 until endQuoteCount) currentLine.append('\'') - atLineStart = false - } + if (endQuoteCount == quoteCount && ch != '\'') { + // Found closing delimiter (not followed by another quote) + foundClosing = true + closingIndent = currentIndent.toString } else { + // False alarm, these quotes are part of the content + // We need to restore and add them to current line + currentLine.append(currentIndent) + for (_ <- 0 until endQuoteCount) currentLine.append('\'') atLineStart = false } + } else { + atLineStart = false } + } - if (!foundClosing && !atLineStart) { - // Regular content - if (ch == CR || ch == LF) { - // End of line - lineIndents += currentIndent.toString - lines += currentLine.toString - currentLine.clear() - currentIndent.clear() - - // Normalize newlines to \n - if (ch == CR) nextRawChar() - if (ch == LF) nextRawChar() - atLineStart = true - } else { - currentLine.append(ch) - nextRawChar() - } + if (!foundClosing && !atLineStart) { + // Regular content + if (ch == CR || ch == LF) { + // End of line + lineIndents += currentIndent.toString + lines += currentLine.toString + currentLine.clear() + currentIndent.clear() + + // Normalize newlines to \n + if (ch == CR) nextRawChar() + if (ch == LF) nextRawChar() + atLineStart = true + } else { + currentLine.append(ch) + nextRawChar() } } + } - if (!foundClosing) { - incompleteInputError(em"unclosed dedented string literal") - } else if (closingIndent == null) { - error(em"internal error: closing indent not set") - token = ERROR - } else { - // Validate and dedent all lines - val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() - val closingIndentLen = closingIndent.length - var hasSpaces = false - var hasTabs = false - - for (indent <- closingIndent) { - if (indent == ' ') hasSpaces = true - if (indent == '\t') hasTabs = true - } + if (!foundClosing) { + incompleteInputError(em"unclosed dedented string literal") + } else if (closingIndent == null) { + error(em"internal error: closing indent not set") + token = ERROR + } else { + // Validate and dedent all lines + val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() + val closingIndentLen = closingIndent.length + var hasSpaces = false + var hasTabs = false + + for (indent <- closingIndent) { + if (indent == ' ') hasSpaces = true + if (indent == '\t') hasTabs = true + } - var hasError = false - for (i <- 0 until lines.length if !hasError) { - val line = lines(i) - val indent = lineIndents(i) - - // Check for mixed tabs and spaces - var lineHasSpaces = false - var lineHasTabs = false - for (ch <- indent) { - if (ch == ' ') lineHasSpaces = true - if (ch == '\t') lineHasTabs = true - } + var hasError = false + for (i <- 0 until lines.length if !hasError) { + val line = lines(i) + val indent = lineIndents(i) + + // Check for mixed tabs and spaces + var lineHasSpaces = false + var lineHasTabs = false + for (ch <- indent) { + if (ch == ' ') lineHasSpaces = true + if (ch == '\t') lineHasTabs = true + } - if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { - error(em"dedented string literal cannot mix tabs and spaces in indentation") + if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { + error(em"dedented string literal cannot mix tabs and spaces in indentation") + token = ERROR + hasError = true + } else if (line.isEmpty) { + // Empty lines are allowed + dedentedLines += "" + } else { + // Non-empty lines must be indented at least as much as closing delimiter + if (!indent.startsWith(closingIndent)) { + error(em"line in dedented string literal must be indented at least as much as the closing delimiter") token = ERROR hasError = true - } else if (line.isEmpty) { - // Empty lines are allowed - dedentedLines += "" } else { - // Non-empty lines must be indented at least as much as closing delimiter - if (!indent.startsWith(closingIndent)) { - error(em"line in dedented string literal must be indented at least as much as the closing delimiter") - token = ERROR - hasError = true - } else { - // Remove the closing indentation from this line - dedentedLines += indent.substring(closingIndentLen) + line - } + // Remove the closing indentation from this line + dedentedLines += indent.substring(closingIndentLen) + line } } + } - if (!hasError) { - // Set the string value (join with \n) - strVal = dedentedLines.mkString("\n") - litBuf.clear() - token = STRINGLIT - } + if (!hasError) { + // Set the string value (join with \n) + strVal = dedentedLines.mkString("\n") + litBuf.clear() + token = STRINGLIT } } } } - /** For interpolated dedented strings - parse string content until ''' or $ */ - @tailrec private def getDedentedStringPartImpl(): Unit = - // Check for closing ''' delimiter + /** Parse interpolated dedented string content, handling $ expressions. + * This collects content until hitting $ or closing delimiter. + * Respects the quote count for extended delimiters. + * + * Note: This parses with the same format requirements as non-interpolated dedented strings + * (newline after opening, extended delimiters, etc.) but does NOT dedent the content during + * parsing. Dedenting for interpolated strings must be handled at runtime after all parts + * are assembled, similar to how the string interpolator combines the parts. + */ + @tailrec private def getDedentedStringPartWithDelimiter(quoteCount: Int): Unit = + // Check for closing delimiter with correct quote count if (ch == '\'') { - nextRawChar() - if (ch == '\'') { + // Count the quotes we encounter + var foundQuotes = 0 + while (ch == '\'' && foundQuotes < quoteCount + 1) { + foundQuotes += 1 nextRawChar() - if (ch == '\'') { - // Found closing ''' - nextChar() - // For now, set the string value without dedenting - // TODO: implement proper dedenting for interpolated strings - setStrVal() - token = STRINGLIT - } - else { - // Two quotes followed by something else, add them to content - putChar('\'') - putChar('\'') - getDedentedStringPartImpl() - } } - else { - // Single quote followed by something else, add it to content - putChar('\'') - getDedentedStringPartImpl() + + if (foundQuotes == quoteCount && ch != '\'') { + // Found closing delimiter - exact match and not followed by another quote + nextChar() + setStrVal() + token = STRINGLIT + } else { + // Not the closing delimiter, add the quotes we found to content + for (_ <- 0 until foundQuotes) putChar('\'') + getDedentedStringPartWithDelimiter(quoteCount) } } else if (ch == '$') { @@ -1501,7 +1503,7 @@ object Scanners { if (ch == '$' || ch == '\'') { putChar(ch) nextRawChar() - getDedentedStringPartImpl() + getDedentedStringPartWithDelimiter(quoteCount) } else if (ch == '{') { setStrVal() @@ -1514,7 +1516,7 @@ object Scanners { else error("invalid string interpolation: `$$`, `$'`, `$`ident or `$`BlockExpr expected".toMessage, off = charOffset - 2) putChar('$') - getDedentedStringPartImpl() + getDedentedStringPartWithDelimiter(quoteCount) } else { val isUnclosedLiteral = !isUnicodeEscape && ch == SU @@ -1523,10 +1525,10 @@ object Scanners { else { putChar(ch) nextRawChar() - getDedentedStringPartImpl() + getDedentedStringPartWithDelimiter(quoteCount) } } - end getDedentedStringPartImpl + end getDedentedStringPartWithDelimiter private def getRawStringLit(): Unit = if (ch == '\"') { From 48680cc5fe3643418811b075fcda57622d4793f4 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 19:35:23 +0800 Subject: [PATCH 06/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index bb515f2c598c..de052230ecad 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -374,7 +374,7 @@ object Scanners { case STRINGLIT => currentRegion match { case InString(_, outer) => currentRegion = outer - case InDedentedString(outer) => currentRegion = outer + case InDedentedString(_, outer) => currentRegion = outer case _ => } case _ => @@ -386,7 +386,9 @@ object Scanners { lastOffset = lastCharOffset currentRegion match case InString(multiLine, _) if lastToken != STRINGPART => fetchStringPart(multiLine) - case InDedentedString(_) if lastToken != STRINGPART => fetchDedentedStringPart() + case InDedentedString(quoteCount, _) if lastToken != STRINGPART => + offset = charOffset - 1 + getDedentedStringPartWithDelimiter(quoteCount) case _ => fetchToken() if token == ERROR then adjustSepRegions(STRINGLIT) // make sure we exit enclosing string literal else @@ -997,10 +999,11 @@ object Scanners { if (token == INTERPOLATIONID) { // For interpolation, handle as string part nextRawChar() - getDedentedString(isInterpolated = true) - currentRegion = InDedentedString(currentRegion) + val quoteCount = getDedentedString(isInterpolated = true) + currentRegion = InDedentedString(quoteCount, currentRegion) } else { getDedentedString(isInterpolated = false) + // No need to store quoteCount for non-interpolated strings } } else { @@ -1295,8 +1298,9 @@ object Scanners { * - All lines must be empty or indented further than closing delimiter * - Supports extended delimiters (e.g., '''', ''''') * @param isInterpolated If true, handles $ interpolation and returns STRINGPART tokens + * @return The quote count (number of quotes in the delimiter) for storing in the region */ - private def getDedentedString(isInterpolated: Boolean): Unit = { + private def getDedentedString(isInterpolated: Boolean): Int = { // For interpolated strings, we're already at the first character after ''' // For non-interpolated, we need to consume the first character if (!isInterpolated) nextChar() @@ -1312,7 +1316,7 @@ object Scanners { if (ch != LF && ch != CR) { error(em"dedented string literal must start with newline after opening quotes") token = ERROR - return + return 0 } // Skip the initial newline (CR LF or just LF) @@ -1322,6 +1326,7 @@ object Scanners { // For interpolated strings, check if we need to handle $ interpolation first if (isInterpolated) { getDedentedStringPartWithDelimiter(quoteCount) + quoteCount } else { // Collect all lines until we find the closing delimiter val lines = scala.collection.mutable.ArrayBuffer[String]() @@ -1442,6 +1447,8 @@ object Scanners { token = STRINGLIT } } + + quoteCount } } @@ -1635,10 +1642,6 @@ object Scanners { getStringPart(multiLine) } - private def fetchDedentedStringPart() = { - offset = charOffset - 1 - getDedentedString(isInterpolated = true) - } private def isTripleQuote(): Boolean = if (ch == '"') { @@ -1958,7 +1961,7 @@ object Scanners { end Region case class InString(multiLine: Boolean, outer: Region) extends Region(RBRACE) - case class InDedentedString(outer: Region) extends Region(RBRACE) + case class InDedentedString(quoteCount: Int, outer: Region) extends Region(RBRACE) case class InParens(prefix: Token, outer: Region) extends Region(prefix + 1) case class InBraces(outer: Region) extends Region(RBRACE) case class InCase(outer: Region) extends Region(OUTDENT) From 3a36a0f899e4bdb4ff09bbfe7940c906df50c157 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 19:39:59 +0800 Subject: [PATCH 07/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 255 ++++++++---------- 1 file changed, 117 insertions(+), 138 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index de052230ecad..8cbdfe925b8c 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -388,7 +388,7 @@ object Scanners { case InString(multiLine, _) if lastToken != STRINGPART => fetchStringPart(multiLine) case InDedentedString(quoteCount, _) if lastToken != STRINGPART => offset = charOffset - 1 - getDedentedStringPartWithDelimiter(quoteCount) + getDedentedStringPartWithDelimiter(quoteCount, isInterpolated = true) case _ => fetchToken() if token == ERROR then adjustSepRegions(STRINGLIT) // make sure we exit enclosing string literal else @@ -1323,145 +1323,29 @@ object Scanners { if (ch == CR) nextRawChar() if (ch == LF) nextRawChar() - // For interpolated strings, check if we need to handle $ interpolation first - if (isInterpolated) { - getDedentedStringPartWithDelimiter(quoteCount) - quoteCount - } else { - // Collect all lines until we find the closing delimiter - val lines = scala.collection.mutable.ArrayBuffer[String]() - val lineIndents = scala.collection.mutable.ArrayBuffer[String]() - var currentLine = new StringBuilder - var currentIndent = new StringBuilder - var atLineStart = true - var closingIndent: String = null - var foundClosing = false - - while (!foundClosing && ch != SU) { - if (atLineStart) { - // Collect indentation - currentIndent.clear() - while (ch == ' ' || ch == '\t') { - currentIndent.append(ch) - nextRawChar() - } - - // Check if this might be the closing delimiter - if (ch == '\'') { - var endQuoteCount = 0 - while (ch == '\'' && endQuoteCount < quoteCount + 1) { - endQuoteCount += 1 - nextRawChar() - } - - if (endQuoteCount == quoteCount && ch != '\'') { - // Found closing delimiter (not followed by another quote) - foundClosing = true - closingIndent = currentIndent.toString - } else { - // False alarm, these quotes are part of the content - // We need to restore and add them to current line - currentLine.append(currentIndent) - for (_ <- 0 until endQuoteCount) currentLine.append('\'') - atLineStart = false - } - } else { - atLineStart = false - } - } - - if (!foundClosing && !atLineStart) { - // Regular content - if (ch == CR || ch == LF) { - // End of line - lineIndents += currentIndent.toString - lines += currentLine.toString - currentLine.clear() - currentIndent.clear() - - // Normalize newlines to \n - if (ch == CR) nextRawChar() - if (ch == LF) nextRawChar() - atLineStart = true - } else { - currentLine.append(ch) - nextRawChar() - } - } - } - - if (!foundClosing) { - incompleteInputError(em"unclosed dedented string literal") - } else if (closingIndent == null) { - error(em"internal error: closing indent not set") - token = ERROR - } else { - // Validate and dedent all lines - val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() - val closingIndentLen = closingIndent.length - var hasSpaces = false - var hasTabs = false - - for (indent <- closingIndent) { - if (indent == ' ') hasSpaces = true - if (indent == '\t') hasTabs = true - } + // Collect all content using the string part parser + getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) - var hasError = false - for (i <- 0 until lines.length if !hasError) { - val line = lines(i) - val indent = lineIndents(i) - - // Check for mixed tabs and spaces - var lineHasSpaces = false - var lineHasTabs = false - for (ch <- indent) { - if (ch == ' ') lineHasSpaces = true - if (ch == '\t') lineHasTabs = true - } - - if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { - error(em"dedented string literal cannot mix tabs and spaces in indentation") - token = ERROR - hasError = true - } else if (line.isEmpty) { - // Empty lines are allowed - dedentedLines += "" - } else { - // Non-empty lines must be indented at least as much as closing delimiter - if (!indent.startsWith(closingIndent)) { - error(em"line in dedented string literal must be indented at least as much as the closing delimiter") - token = ERROR - hasError = true - } else { - // Remove the closing indentation from this line - dedentedLines += indent.substring(closingIndentLen) + line - } - } - } - - if (!hasError) { - // Set the string value (join with \n) - strVal = dedentedLines.mkString("\n") - litBuf.clear() - token = STRINGLIT - } - } - - quoteCount + // For non-interpolated strings, we need to dedent the collected content + if (!isInterpolated && token == STRINGLIT) { + dedentCollectedString() } + + quoteCount } - /** Parse interpolated dedented string content, handling $ expressions. - * This collects content until hitting $ or closing delimiter. + /** Parse dedented string content, with optional $ interpolation handling. + * This collects content until hitting $ (if interpolated) or closing delimiter. * Respects the quote count for extended delimiters. * - * Note: This parses with the same format requirements as non-interpolated dedented strings - * (newline after opening, extended delimiters, etc.) but does NOT dedent the content during - * parsing. Dedenting for interpolated strings must be handled at runtime after all parts - * are assembled, similar to how the string interpolator combines the parts. + * @param quoteCount The number of quotes in the delimiter (3 for ''', 4 for '''', etc.) + * @param isInterpolated If true, handles $ expressions and returns STRINGPART tokens. + * If false, treats $ as regular content and returns STRINGLIT. + * + * Note: Interpolated strings do NOT dedent during parsing - dedenting must be handled + * at runtime after all parts are assembled. Non-interpolated strings dedent after collection. */ - @tailrec private def getDedentedStringPartWithDelimiter(quoteCount: Int): Unit = + @tailrec private def getDedentedStringPartWithDelimiter(quoteCount: Int, isInterpolated: Boolean): Unit = // Check for closing delimiter with correct quote count if (ch == '\'') { // Count the quotes we encounter @@ -1479,10 +1363,10 @@ object Scanners { } else { // Not the closing delimiter, add the quotes we found to content for (_ <- 0 until foundQuotes) putChar('\'') - getDedentedStringPartWithDelimiter(quoteCount) + getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) } } - else if (ch == '$') { + else if (isInterpolated && ch == '$') { // Handle interpolation def getInterpolatedIdentRest(hasSupplement: Boolean): Unit = @tailrec def loopRest(): Unit = @@ -1510,7 +1394,7 @@ object Scanners { if (ch == '$' || ch == '\'') { putChar(ch) nextRawChar() - getDedentedStringPartWithDelimiter(quoteCount) + getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) } else if (ch == '{') { setStrVal() @@ -1523,7 +1407,7 @@ object Scanners { else error("invalid string interpolation: `$$`, `$'`, `$`ident or `$`BlockExpr expected".toMessage, off = charOffset - 2) putChar('$') - getDedentedStringPartWithDelimiter(quoteCount) + getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) } else { val isUnclosedLiteral = !isUnicodeEscape && ch == SU @@ -1532,11 +1416,106 @@ object Scanners { else { putChar(ch) nextRawChar() - getDedentedStringPartWithDelimiter(quoteCount) + getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) } } end getDedentedStringPartWithDelimiter + /** Dedent a collected string by analyzing line structure and removing common indentation. + * This processes the content in `strVal`, validating indentation rules and removing + * the minimum common indentation from all non-empty lines. + */ + private def dedentCollectedString(): Unit = { + val content = strVal + if (content.isEmpty) return + + val lines = scala.collection.mutable.ArrayBuffer[String]() + val lineIndents = scala.collection.mutable.ArrayBuffer[String]() + + // Parse content into lines with their indentation + var i = 0 + while (i < content.length) { + // Collect indentation for this line + val indentStart = i + while (i < content.length && (content(i) == ' ' || content(i) == '\t')) { + i += 1 + } + val indent = content.substring(indentStart, i) + + // Collect rest of line + val lineStart = i + while (i < content.length && content(i) != '\n') { + i += 1 + } + val line = content.substring(lineStart, i) + + lines += line + lineIndents += indent + + // Skip the newline + if (i < content.length && content(i) == '\n') { + i += 1 + } + } + + // The last line's indentation is the closing indentation + if (lines.isEmpty) { + strVal = "" + return + } + + val closingIndent = lineIndents.last + val closingIndentLen = closingIndent.length + + // Check for mixed tabs/spaces in closing indent + var hasSpaces = false + var hasTabs = false + for (ch <- closingIndent) { + if (ch == ' ') hasSpaces = true + if (ch == '\t') hasTabs = true + } + + // Validate and dedent all lines + val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() + var hasError = false + + for (i <- 0 until lines.length - 1 if !hasError) { // Skip last line (it's empty after closing delimiter) + val line = lines(i) + val indent = lineIndents(i) + + // Check for mixed tabs and spaces + var lineHasSpaces = false + var lineHasTabs = false + for (ch <- indent) { + if (ch == ' ') lineHasSpaces = true + if (ch == '\t') lineHasTabs = true + } + + if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { + error(em"dedented string literal cannot mix tabs and spaces in indentation") + token = ERROR + hasError = true + } else if (line.isEmpty) { + // Empty lines are allowed + dedentedLines += "" + } else { + // Non-empty lines must be indented at least as much as closing delimiter + if (!indent.startsWith(closingIndent)) { + error(em"line in dedented string literal must be indented at least as much as the closing delimiter") + token = ERROR + hasError = true + } else { + // Remove the closing indentation from this line + dedentedLines += indent.substring(closingIndentLen) + line + } + } + } + + if (!hasError) { + strVal = dedentedLines.mkString("\n") + } + } + private def getRawStringLit(): Unit = if (ch == '\"') { nextRawChar() From b181814e55f7fa8f76e437f5a3dc70f4d034f6c4 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 20:50:36 +0800 Subject: [PATCH 08/30] . --- .../dotty/tools/dotc/parsing/Scanners.scala | 101 ------------------ 1 file changed, 101 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 8cbdfe925b8c..023176e18fff 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1326,11 +1326,6 @@ object Scanners { // Collect all content using the string part parser getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) - // For non-interpolated strings, we need to dedent the collected content - if (!isInterpolated && token == STRINGLIT) { - dedentCollectedString() - } - quoteCount } @@ -1357,7 +1352,6 @@ object Scanners { if (foundQuotes == quoteCount && ch != '\'') { // Found closing delimiter - exact match and not followed by another quote - nextChar() setStrVal() token = STRINGLIT } else { @@ -1421,101 +1415,6 @@ object Scanners { } end getDedentedStringPartWithDelimiter - /** Dedent a collected string by analyzing line structure and removing common indentation. - * This processes the content in `strVal`, validating indentation rules and removing - * the minimum common indentation from all non-empty lines. - */ - private def dedentCollectedString(): Unit = { - val content = strVal - if (content.isEmpty) return - - val lines = scala.collection.mutable.ArrayBuffer[String]() - val lineIndents = scala.collection.mutable.ArrayBuffer[String]() - - // Parse content into lines with their indentation - var i = 0 - while (i < content.length) { - // Collect indentation for this line - val indentStart = i - while (i < content.length && (content(i) == ' ' || content(i) == '\t')) { - i += 1 - } - val indent = content.substring(indentStart, i) - - // Collect rest of line - val lineStart = i - while (i < content.length && content(i) != '\n') { - i += 1 - } - val line = content.substring(lineStart, i) - - lines += line - lineIndents += indent - - // Skip the newline - if (i < content.length && content(i) == '\n') { - i += 1 - } - } - - // The last line's indentation is the closing indentation - if (lines.isEmpty) { - strVal = "" - return - } - - val closingIndent = lineIndents.last - val closingIndentLen = closingIndent.length - - // Check for mixed tabs/spaces in closing indent - var hasSpaces = false - var hasTabs = false - for (ch <- closingIndent) { - if (ch == ' ') hasSpaces = true - if (ch == '\t') hasTabs = true - } - - // Validate and dedent all lines - val dedentedLines = scala.collection.mutable.ArrayBuffer[String]() - var hasError = false - - for (i <- 0 until lines.length - 1 if !hasError) { // Skip last line (it's empty after closing delimiter) - val line = lines(i) - val indent = lineIndents(i) - - // Check for mixed tabs and spaces - var lineHasSpaces = false - var lineHasTabs = false - for (ch <- indent) { - if (ch == ' ') lineHasSpaces = true - if (ch == '\t') lineHasTabs = true - } - - if ((hasSpaces && lineHasTabs) || (hasTabs && lineHasSpaces)) { - error(em"dedented string literal cannot mix tabs and spaces in indentation") - token = ERROR - hasError = true - } else if (line.isEmpty) { - // Empty lines are allowed - dedentedLines += "" - } else { - // Non-empty lines must be indented at least as much as closing delimiter - if (!indent.startsWith(closingIndent)) { - error(em"line in dedented string literal must be indented at least as much as the closing delimiter") - token = ERROR - hasError = true - } else { - // Remove the closing indentation from this line - dedentedLines += indent.substring(closingIndentLen) + line - } - } - } - - if (!hasError) { - strVal = dedentedLines.mkString("\n") - } - } - private def getRawStringLit(): Unit = if (ch == '\"') { nextRawChar() From 7e8e5a7efacce51f7b844ca2e1f03bff87e641f4 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 21:44:15 +0800 Subject: [PATCH 09/30] . --- .../dotty/tools/dotc/parsing/Parsers.scala | 222 +++++++++++++++--- .../dotty/tools/dotc/parsing/Scanners.scala | 2 + 2 files changed, 195 insertions(+), 29 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 64eb442c239a..2a5ea1d48eca 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1349,6 +1349,46 @@ object Parsers { else literal(inTypeOrSingleton = true) + /** Dedent a string literal by removing common leading whitespace. + * The amount of whitespace to remove is determined by the indentation + * of the last line (which should contain only whitespace before the + * closing delimiter). + */ + private def dedentString(str: String): String = { + if (str.isEmpty) return str + + // Find the last line (should be just whitespace before closing delimiter) + val lastNewlineIdx = str.lastIndexOf('\n') + if (lastNewlineIdx < 0) { + // No newlines, return as-is (shouldn't happen for valid dedented strings) + return str + } + + // Extract the indentation from the last line + val closingIndent = str.substring(lastNewlineIdx + 1) + + // Split into lines + val lines = str.split("\n", -1) // -1 to keep trailing empty strings + + // Process all lines except the last (which is just the closing indentation) + val dedented = lines.dropRight(1).map { line => + if (line.startsWith(closingIndent)) { + line.substring(closingIndent.length) + } else if (line.trim.isEmpty) { + // Empty or whitespace-only lines + "" + } else { + // Line doesn't start with the closing indentation, keep as-is + line + } + } + + // Drop the first line if it's empty (the newline after opening delimiter) + val result = if (dedented.headOption.contains("")) dedented.drop(1) else dedented + + result.mkString("\n") + } + /** Literal ::= SimpleLiteral * | processedStringLiteral * | symbolLiteral @@ -1377,7 +1417,15 @@ object Parsers { case FLOATLIT => floatFromDigits(digits) case DOUBLELIT | DECILIT | EXPOLIT => doubleFromDigits(digits) case CHARLIT => in.strVal.head - case STRINGLIT | STRINGPART => in.strVal + case STRINGLIT | STRINGPART => + // Check if this is a dedented string (non-interpolated) + // For non-interpolated dedented strings, check if the token starts with ''' + val str = in.strVal + if (token == STRINGLIT && !inStringInterpolation && isDedentedStringLiteral(negOffset)) { + dedentString(str) + } else { + str + } case TRUE => true case FALSE => false case NULL => null @@ -1391,6 +1439,15 @@ object Parsers { Literal(Constant(value)) } + /** Check if a string literal at the given offset is a dedented string */ + def isDedentedStringLiteral(offset: Int): Boolean = { + val buf = in.buf + offset + 2 < buf.length && + buf(offset) == '\'' && + buf(offset + 1) == '\'' && + buf(offset + 2) == '\'' + } + if (inStringInterpolation) { val t = in.token match { case STRINGLIT | STRINGPART => @@ -1447,40 +1504,147 @@ object Parsers { in.charOffset + 1 < in.buf.length && in.buf(in.charOffset) == '"' && in.buf(in.charOffset + 1) == '"' + val isDedented = + in.charOffset + 2 < in.buf.length && + in.buf(in.charOffset) == '\'' && + in.buf(in.charOffset + 1) == '\'' && + in.buf(in.charOffset + 2) == '\'' + in.nextToken() - def nextSegment(literalOffset: Offset) = - segmentBuf += Thicket( - literal(literalOffset, inPattern = inPattern, inStringInterpolation = true), - atSpan(in.offset) { - if (in.token == IDENTIFIER) - termIdent() - else if (in.token == USCORE && inPattern) { - in.nextToken() - Ident(nme.WILDCARD) - } - else if (in.token == THIS) { - in.nextToken() - This(EmptyTypeIdent) - } - else if (in.token == LBRACE) - if (inPattern) Block(Nil, inBraces(pattern())) - else expr() - else { - report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) - EmptyTree - } - }) - var offsetCorrection = if isTripleQuoted then 3 else 1 - while (in.token == STRINGPART) - nextSegment(in.offset + offsetCorrection) - offsetCorrection = 0 - if (in.token == STRINGLIT) - segmentBuf += literal(inPattern = inPattern, negOffset = in.offset + offsetCorrection, inStringInterpolation = true) + // For dedented strings, we need to collect all string parts first, + // then dedent them all based on the closing indentation + if (isDedented) { + // Collect all string parts and their offsets + val stringParts = new ListBuffer[(String, Offset)] + val interpolatedExprs = new ListBuffer[Tree] + + var offsetCorrection = 3 // triple single quotes + while (in.token == STRINGPART) { + val literalOffset = in.offset + offsetCorrection + stringParts += ((in.strVal, literalOffset)) + offsetCorrection = 0 + in.nextToken() + + // Collect the interpolated expression + interpolatedExprs += atSpan(in.offset) { + if (in.token == IDENTIFIER) + termIdent() + else if (in.token == USCORE && inPattern) { + in.nextToken() + Ident(nme.WILDCARD) + } + else if (in.token == THIS) { + in.nextToken() + This(EmptyTypeIdent) + } + else if (in.token == LBRACE) + if (inPattern) Block(Nil, inBraces(pattern())) + else expr() + else { + report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) + EmptyTree + } + } + } + + // Get the final STRINGLIT + val finalLiteral = if (in.token == STRINGLIT) { + val s = in.strVal + val off = in.offset + offsetCorrection + stringParts += ((s, off)) + in.nextToken() + true + } else false + + // Now dedent all string parts based on the last one's closing indentation + if (stringParts.nonEmpty) { + val lastPart = stringParts.last._1 + val closingIndent = extractClosingIndent(lastPart) + + // Dedent all parts + val dedentedParts = stringParts.map { case (str, offset) => + (dedentStringPart(str, closingIndent), offset) + } + + // Build the segments with dedented strings + for (i <- 0 until dedentedParts.size - 1) { + val (dedentedStr, offset) = dedentedParts(i) + segmentBuf += Thicket( + atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) }, + interpolatedExprs(i) + ) + } + + // Add the final literal if present + if (finalLiteral) { + val (dedentedStr, offset) = dedentedParts.last + segmentBuf += atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) } + } + } + } else { + // Non-dedented string: use original logic + def nextSegment(literalOffset: Offset) = + segmentBuf += Thicket( + literal(literalOffset, inPattern = inPattern, inStringInterpolation = true), + atSpan(in.offset) { + if (in.token == IDENTIFIER) + termIdent() + else if (in.token == USCORE && inPattern) { + in.nextToken() + Ident(nme.WILDCARD) + } + else if (in.token == THIS) { + in.nextToken() + This(EmptyTypeIdent) + } + else if (in.token == LBRACE) + if (inPattern) Block(Nil, inBraces(pattern())) + else expr() + else { + report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) + EmptyTree + } + }) + + var offsetCorrection = if isTripleQuoted then 3 else 1 + while (in.token == STRINGPART) + nextSegment(in.offset + offsetCorrection) + offsetCorrection = 0 + if (in.token == STRINGLIT) + segmentBuf += literal(inPattern = inPattern, negOffset = in.offset + offsetCorrection, inStringInterpolation = true) + } InterpolatedString(interpolator, segmentBuf.toList) } + /** Extract the closing indentation from the last line of a string */ + private def extractClosingIndent(str: String): String = { + val lastNewlineIdx = str.lastIndexOf('\n') + if (lastNewlineIdx < 0) "" else str.substring(lastNewlineIdx + 1) + } + + /** Dedent a string part by removing the specified indentation from each line */ + private def dedentStringPart(str: String, closingIndent: String): String = { + if (str.isEmpty || closingIndent.isEmpty) return str + + val lines = str.split("\n", -1) // -1 to keep trailing empty strings + + val dedented = lines.map { line => + if (line.startsWith(closingIndent)) { + line.substring(closingIndent.length) + } else if (line.trim.isEmpty) { + // Empty or whitespace-only lines + "" + } else { + // Line doesn't start with the closing indentation, keep as-is + line + } + } + + dedented.mkString("\n") + } + /* ------------- NEW LINES ------------------------------------------------- */ def newLineOpt(): Unit = diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 023176e18fff..b969ad855577 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1349,10 +1349,12 @@ object Scanners { foundQuotes += 1 nextRawChar() } + charOffset -= 1 if (foundQuotes == quoteCount && ch != '\'') { // Found closing delimiter - exact match and not followed by another quote setStrVal() + nextChar() // Switch from raw mode to normal mode token = STRINGLIT } else { // Not the closing delimiter, add the quotes we found to content From c9fbf70806e16ca1a0d324d64136d56911ebd275 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 21:58:26 +0800 Subject: [PATCH 10/30] . --- .../dotty/tools/dotc/parsing/Parsers.scala | 32 ++++++++----------- .../dotty/tools/dotc/parsing/Scanners.scala | 5 +-- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 2a5ea1d48eca..dc285b3d38cd 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1359,28 +1359,21 @@ object Parsers { // Find the last line (should be just whitespace before closing delimiter) val lastNewlineIdx = str.lastIndexOf('\n') - if (lastNewlineIdx < 0) { - // No newlines, return as-is (shouldn't happen for valid dedented strings) - return str - } + assert( + lastNewlineIdx >= 0, + "Dedented string literal must contain at least two newlines" + ) - // Extract the indentation from the last line val closingIndent = str.substring(lastNewlineIdx + 1) // Split into lines - val lines = str.split("\n", -1) // -1 to keep trailing empty strings + val lines = str.linesIterator.toSeq // Process all lines except the last (which is just the closing indentation) val dedented = lines.dropRight(1).map { line => - if (line.startsWith(closingIndent)) { - line.substring(closingIndent.length) - } else if (line.trim.isEmpty) { - // Empty or whitespace-only lines - "" - } else { - // Line doesn't start with the closing indentation, keep as-is - line - } + if (line.startsWith(closingIndent)) line.substring(closingIndent.length) + else if (line.trim.isEmpty) "" // Empty or whitespace-only lines + else ??? // should never happen } // Drop the first line if it's empty (the newline after opening delimiter) @@ -1397,7 +1390,10 @@ object Parsers { * @param negOffset The offset of a preceding `-' sign, if any. * If the literal is not negated, negOffset == in.offset. */ - def literal(negOffset: Int = in.offset, inPattern: Boolean = false, inTypeOrSingleton: Boolean = false, inStringInterpolation: Boolean = false): Tree = { + def literal(negOffset: Int = in.offset, + inPattern: Boolean = false, + inTypeOrSingleton: Boolean = false, + inStringInterpolation: Boolean = false): Tree = { def literalOf(token: Token): Tree = { val isNegated = negOffset < in.offset def digits0 = in.removeNumberSeparators(in.strVal) @@ -1423,9 +1419,7 @@ object Parsers { val str = in.strVal if (token == STRINGLIT && !inStringInterpolation && isDedentedStringLiteral(negOffset)) { dedentString(str) - } else { - str - } + } else str case TRUE => true case FALSE => false case NULL => null diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index b969ad855577..92865c6e4617 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1349,6 +1349,8 @@ object Scanners { foundQuotes += 1 nextRawChar() } + // The while-loop above steps forward to the first non-`'` character, + // so we need to backtrack 1 char to avoid consuming it charOffset -= 1 if (foundQuotes == quoteCount && ch != '\'') { @@ -1407,8 +1409,7 @@ object Scanners { } else { val isUnclosedLiteral = !isUnicodeEscape && ch == SU - if (isUnclosedLiteral) - incompleteInputError(em"unclosed dedented string literal") + if (isUnclosedLiteral) incompleteInputError(em"unclosed dedented string literal") else { putChar(ch) nextRawChar() From aa18b7e26528c918fcbefbd360e63775027b4002 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 21:59:41 +0800 Subject: [PATCH 11/30] . --- compiler/src/dotty/tools/dotc/parsing/Parsers.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index dc285b3d38cd..253a7626cad1 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1365,7 +1365,10 @@ object Parsers { ) val closingIndent = str.substring(lastNewlineIdx + 1) - + assert( + closingIndent.forall(_.isWhitespace), + "Last line of a dedented string literal must contain only whitespace followed by the closing delimiter" + ) // Split into lines val lines = str.linesIterator.toSeq From 17205d986285d194f6c52dd39481cb13e15c10f3 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 22:00:15 +0800 Subject: [PATCH 12/30] . --- compiler/src/dotty/tools/dotc/parsing/Parsers.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 253a7626cad1..523eca5a4741 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1376,7 +1376,10 @@ object Parsers { val dedented = lines.dropRight(1).map { line => if (line.startsWith(closingIndent)) line.substring(closingIndent.length) else if (line.trim.isEmpty) "" // Empty or whitespace-only lines - else ??? // should never happen + else assert( + false, + s"line \"$line\" in dedented string must be either empty or be further indented than the closing delimiter" + ) } // Drop the first line if it's empty (the newline after opening delimiter) From 300f30031d646c0f4cf9c723804bc110846c4934 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 22:26:29 +0800 Subject: [PATCH 13/30] . --- .../dotty/tools/dotc/parsing/Parsers.scala | 72 ++++++++++++++----- tests/neg/dedented-string-literals.check | 18 ++--- tests/neg/dedented-string-literals.scala | 21 +++--- 3 files changed, 76 insertions(+), 35 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 523eca5a4741..87b9be4623ca 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1353,33 +1353,73 @@ object Parsers { * The amount of whitespace to remove is determined by the indentation * of the last line (which should contain only whitespace before the * closing delimiter). + * + * @param str The string content to dedent + * @param offset The source offset where the string literal begins + * @return The dedented string, or str if errors were reported */ - private def dedentString(str: String): String = { + private def dedentString(str: String, offset: Offset): String = { if (str.isEmpty) return str // Find the last line (should be just whitespace before closing delimiter) val lastNewlineIdx = str.lastIndexOf('\n') - assert( - lastNewlineIdx >= 0, - "Dedented string literal must contain at least two newlines" - ) + if (lastNewlineIdx < 0) { + syntaxError( + em"dedented string literal must start with newline after opening quotes", + offset + ) + return str + } val closingIndent = str.substring(lastNewlineIdx + 1) - assert( - closingIndent.forall(_.isWhitespace), - "Last line of a dedented string literal must contain only whitespace followed by the closing delimiter" - ) + if (!closingIndent.forall(_.isWhitespace)) { + syntaxError( + em"last line of dedented string literal must contain only whitespace before closing delimiter", + offset + ) + return str + } + + // Check for mixed tabs and spaces in closing indent + val hasTabs = closingIndent.contains('\t') + val hasSpaces = closingIndent.contains(' ') + if (hasTabs && hasSpaces) { + syntaxError( + em"dedented string literal cannot mix tabs and spaces in indentation", + offset + ) + return str + } + // Split into lines val lines = str.linesIterator.toSeq // Process all lines except the last (which is just the closing indentation) + var lineOffset = offset val dedented = lines.dropRight(1).map { line => - if (line.startsWith(closingIndent)) line.substring(closingIndent.length) - else if (line.trim.isEmpty) "" // Empty or whitespace-only lines - else assert( - false, - s"line \"$line\" in dedented string must be either empty or be further indented than the closing delimiter" - ) + val result = + if (line.startsWith(closingIndent)) line.substring(closingIndent.length) + else if (line.trim.isEmpty) "" // Empty or whitespace-only lines + else { + // Check if this line has mixed tabs/spaces that don't match closing indent + val lineIndent = line.takeWhile(_.isWhitespace) + val lineHasTabs = lineIndent.contains('\t') + val lineHasSpaces = lineIndent.contains(' ') + if ((hasTabs && lineHasSpaces && !lineHasTabs) || (hasSpaces && lineHasTabs && !lineHasSpaces)) { + syntaxError( + em"dedented string literal cannot mix tabs and spaces in indentation", + offset + ) + } else { + syntaxError( + em"line in dedented string literal must be indented at least as much as the closing delimiter", + lineOffset + ) + } + line + } + lineOffset += line.length + 1 // +1 for the newline + result } // Drop the first line if it's empty (the newline after opening delimiter) @@ -1424,7 +1464,7 @@ object Parsers { // For non-interpolated dedented strings, check if the token starts with ''' val str = in.strVal if (token == STRINGLIT && !inStringInterpolation && isDedentedStringLiteral(negOffset)) { - dedentString(str) + dedentString(str, negOffset) } else str case TRUE => true case FALSE => false diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check index fcf9b7583ccb..f68092266a9f 100644 --- a/tests/neg/dedented-string-literals.check +++ b/tests/neg/dedented-string-literals.check @@ -10,16 +10,16 @@ 13 | val mixedTabsSpaces = ''' | ^ |dedented string literal cannot mix tabs and spaces in indentation --- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:19:17 -19 | val unclosed = ''' - | ^ - |unclosed dedented string literal --- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:23:35 -23 | val nonWhitespaceBeforeClosing = ''' +-- Error: tests/neg/dedented-string-literals.scala:19:35 +19 | val nonWhitespaceBeforeClosing = ''' | ^ - |unclosed dedented string literal --- Error: tests/neg/dedented-string-literals.scala:39:4 -39 | onlyAtCompileTime // error + |last line of dedented string literal must contain only whitespace before closing delimiter +-- Error: tests/neg/dedented-string-literals.scala:35:4 +35 | onlyAtCompileTime // error | ^^^^^^^^^^^^^^^^^ |This method should only be used at compile time |Do not call at runtime +-- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:41:17 +41 | val unclosed = ''' + | ^ + |unclosed dedented string literal diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index 7517ed11bee3..0fbe429cf0a5 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -1,28 +1,24 @@ // Test error cases for dedented string literals -object DedentedStringErrors { // nopos-error // nopos-error // nopos-error +object DedentedStringErrors { // Error: No newline after opening quotes - val noNewlineAfterOpen = '''content on same line // error + val noNewlineAfterOpen = '''content on same line // error: dedented string literal must start with a newline // Error: Content not indented enough val notIndented = ''' -content +content // error: line in dedented string literal is indented less than the closing delimiter ''' - // Error: Mixed tabs and spaces + // Error: Mixed tabs and spaces - first line has tab, but closing delimiter has spaces val mixedTabsSpaces = ''' - tab line + tab line // error: line in dedented string literal is indented less than the closing delimiter space line ''' - // Error: Unclosed literal - val unclosed = ''' -some content - // Error: Non-whitespace before closing delimiter val nonWhitespaceBeforeClosing = ''' content here - text''' + text''' // error: last line of dedented string literal must contain only whitespace before closing delimiter } // Test @compileTimeOnly with dedented string @@ -39,3 +35,8 @@ object CompileTimeOnlyTest { onlyAtCompileTime // error } } + +// Error: Unclosed literal - must be last since it breaks parsing +object UnclosedTest { + val unclosed = ''' // error: unclosed dedented string literal +some content From 5c8c89209efbcefad299c336161d95e6b95f4a2f Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 15 Oct 2025 23:26:08 +0800 Subject: [PATCH 14/30] . --- .../src/dotty/tools/dotc/parsing/Parsers.scala | 15 +++++++-------- .../src/dotty/tools/dotc/parsing/Scanners.scala | 10 +++------- tests/neg/dedented-string-literals.check | 8 ++++---- tests/neg/dedented-string-literals.scala | 4 ++-- tests/run/dedented-string-literals.check | 17 ++++++++--------- tests/run/dedented-string-literals.scala | 7 +++---- 6 files changed, 27 insertions(+), 34 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 87b9be4623ca..95e5037d0657 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1363,6 +1363,7 @@ object Parsers { // Find the last line (should be just whitespace before closing delimiter) val lastNewlineIdx = str.lastIndexOf('\n') + if (lastNewlineIdx < 0) { syntaxError( em"dedented string literal must start with newline after opening quotes", @@ -1392,11 +1393,12 @@ object Parsers { } // Split into lines - val lines = str.linesIterator.toSeq + val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq - // Process all lines except the last (which is just the closing indentation) + // Process all lines except the first (which is empty before the first newline) + // and the last (which is just the closing indentation) var lineOffset = offset - val dedented = lines.dropRight(1).map { line => + val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => val result = if (line.startsWith(closingIndent)) line.substring(closingIndent.length) else if (line.trim.isEmpty) "" // Empty or whitespace-only lines @@ -1418,14 +1420,11 @@ object Parsers { } line } - lineOffset += line.length + 1 // +1 for the newline + lineOffset += lineWithSep.length // Make sure to include any \n, \r, \r\n, or \n\r result } - // Drop the first line if it's empty (the newline after opening delimiter) - val result = if (dedented.headOption.contains("")) dedented.drop(1) else dedented - - result.mkString("\n") + dedented.mkString("\n") } /** Literal ::= SimpleLiteral diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 92865c6e4617..8aaf81225e45 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1319,10 +1319,6 @@ object Scanners { return 0 } - // Skip the initial newline (CR LF or just LF) - if (ch == CR) nextRawChar() - if (ch == LF) nextRawChar() - // Collect all content using the string part parser getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) @@ -1349,11 +1345,11 @@ object Scanners { foundQuotes += 1 nextRawChar() } - // The while-loop above steps forward to the first non-`'` character, - // so we need to backtrack 1 char to avoid consuming it - charOffset -= 1 if (foundQuotes == quoteCount && ch != '\'') { + // The while-loop above steps forward to the first non-`'` character, + // so we need to backtrack 1 char to avoid consuming it + charOffset -= 1 // Found closing delimiter - exact match and not followed by another quote setStrVal() nextChar() // Switch from raw mode to normal mode diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check index f68092266a9f..7c0a359f33dd 100644 --- a/tests/neg/dedented-string-literals.check +++ b/tests/neg/dedented-string-literals.check @@ -14,12 +14,12 @@ 19 | val nonWhitespaceBeforeClosing = ''' | ^ |last line of dedented string literal must contain only whitespace before closing delimiter +-- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:41:17 +41 | val unclosed = ''' + | ^ + |unclosed dedented string literal -- Error: tests/neg/dedented-string-literals.scala:35:4 35 | onlyAtCompileTime // error | ^^^^^^^^^^^^^^^^^ |This method should only be used at compile time |Do not call at runtime --- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:41:17 -41 | val unclosed = ''' - | ^ - |unclosed dedented string literal diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index 0fbe429cf0a5..9e39d967b6bf 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -38,5 +38,5 @@ object CompileTimeOnlyTest { // Error: Unclosed literal - must be last since it breaks parsing object UnclosedTest { - val unclosed = ''' // error: unclosed dedented string literal -some content + val unclosed = ''' + some content // error: unclosed dedented string literal diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index 1a8c6bd9cd86..f4531e82ef7b 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -55,7 +55,6 @@ Empty lines anywhere: content more content - ] With quotes: @@ -94,20 +93,20 @@ Interpolated pattern (two lines): Two line interpolated result: matched two line greeting In function: -function content -more content + function content + more content In class: -class member -content + class member + content In list: -Item: [first] -Item: [second] -Item: [third] +Item: [ first] +Item: [ second] +Item: [ third] Nested in expressions: -prefixmiddlesuffix +prefix middlesuffix Type ascription: Value: [ first line diff --git a/tests/run/dedented-string-literals.scala b/tests/run/dedented-string-literals.scala index 75a6d58a9bed..03fc6d597428 100644 --- a/tests/run/dedented-string-literals.scala +++ b/tests/run/dedented-string-literals.scala @@ -112,8 +112,8 @@ object Test { // Test tabs for indentation val withTabs = ''' - tab indented - content here + tab indented + content here ''' println("With tabs:") println(withTabs) @@ -289,8 +289,7 @@ object Test { println() // Test as type parameter to valueOf - import scala.compiletime.valueOf - val valueOfResult = valueOf[''' + val valueOfResult = scala.compiletime.constValue[''' alpha beta gamma From b687fccdb29d1dbbd7ad9d5d65738d2cda471eee Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 00:02:32 +0800 Subject: [PATCH 15/30] . --- .../dotty/tools/dotc/parsing/Parsers.scala | 103 ++++++++---------- 1 file changed, 45 insertions(+), 58 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 95e5037d0657..97de05e45655 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1358,28 +1358,11 @@ object Parsers { * @param offset The source offset where the string literal begins * @return The dedented string, or str if errors were reported */ - private def dedentString(str: String, offset: Offset): String = { - if (str.isEmpty) return str - - // Find the last line (should be just whitespace before closing delimiter) - val lastNewlineIdx = str.lastIndexOf('\n') - - if (lastNewlineIdx < 0) { - syntaxError( - em"dedented string literal must start with newline after opening quotes", - offset - ) - return str - } - - val closingIndent = str.substring(lastNewlineIdx + 1) - if (!closingIndent.forall(_.isWhitespace)) { - syntaxError( - em"last line of dedented string literal must contain only whitespace before closing delimiter", - offset - ) - return str - } + private def dedentString(str: String, + offset: Offset, + closingIndent: String, + isFirstPart: Boolean, + isLastPart: Boolean): String = { // Check for mixed tabs and spaces in closing indent val hasTabs = closingIndent.contains('\t') @@ -1395,10 +1378,8 @@ object Parsers { // Split into lines val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq - // Process all lines except the first (which is empty before the first newline) - // and the last (which is just the closing indentation) var lineOffset = offset - val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => + def dedentLine(line: String, lineWithSep: String) = { val result = if (line.startsWith(closingIndent)) line.substring(closingIndent.length) else if (line.trim.isEmpty) "" // Empty or whitespace-only lines @@ -1424,7 +1405,30 @@ object Parsers { result } - dedented.mkString("\n") + // If this is the first part of a string, then the first line is the empty string following + // the opening `'''` delimiter, so we skip it. If not, then the first line is immediately + // following an interpolated value, and should be used raw without indenting + val firstLine = + if (isFirstPart) Nil + else { + val (line, lineWithSep) = linesAndWithSeps.head + lineOffset += lineWithSep.length + Seq(line) + } + + // Process all lines except the first and last, which require special handling + val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => + dedentLine(line, lineWithSep) + } + + // If this is the last part of the string, then the last line is the indentation-only + // line preceding the closing delimiter, and should be ignored. If not, then the last line + // also needs to be de-dented + val lastLine = + if (isLastPart) Nil + else Seq(dedentLine(linesAndWithSeps.last._1, linesAndWithSeps.last._2)) + + (firstLine ++ dedented ++ lastLine).mkString("\n") } /** Literal ::= SimpleLiteral @@ -1463,7 +1467,7 @@ object Parsers { // For non-interpolated dedented strings, check if the token starts with ''' val str = in.strVal if (token == STRINGLIT && !inStringInterpolation && isDedentedStringLiteral(negOffset)) { - dedentString(str, negOffset) + dedentString(str, negOffset, extractClosingIndent(str, negOffset), true, true) } else str case TRUE => true case FALSE => false @@ -1545,14 +1549,11 @@ object Parsers { in.buf(in.charOffset + 1) == '"' val isDedented = in.charOffset + 2 < in.buf.length && + in.buf(in.charOffset - 1) == '\'' && in.buf(in.charOffset) == '\'' && - in.buf(in.charOffset + 1) == '\'' && - in.buf(in.charOffset + 2) == '\'' - + in.buf(in.charOffset + 1) == '\'' in.nextToken() - // For dedented strings, we need to collect all string parts first, - // then dedent them all based on the closing indentation if (isDedented) { // Collect all string parts and their offsets val stringParts = new ListBuffer[(String, Offset)] @@ -1599,11 +1600,11 @@ object Parsers { // Now dedent all string parts based on the last one's closing indentation if (stringParts.nonEmpty) { val lastPart = stringParts.last._1 - val closingIndent = extractClosingIndent(lastPart) + val closingIndent = extractClosingIndent(lastPart, in.offset) // Dedent all parts - val dedentedParts = stringParts.map { case (str, offset) => - (dedentStringPart(str, closingIndent), offset) + val dedentedParts = stringParts.zipWithIndex.map { case ((str, offset), index) => + (dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length-1), offset) } // Build the segments with dedented strings @@ -1658,30 +1659,16 @@ object Parsers { } /** Extract the closing indentation from the last line of a string */ - private def extractClosingIndent(str: String): String = { - val lastNewlineIdx = str.lastIndexOf('\n') - if (lastNewlineIdx < 0) "" else str.substring(lastNewlineIdx + 1) - } - - /** Dedent a string part by removing the specified indentation from each line */ - private def dedentStringPart(str: String, closingIndent: String): String = { - if (str.isEmpty || closingIndent.isEmpty) return str - - val lines = str.split("\n", -1) // -1 to keep trailing empty strings - - val dedented = lines.map { line => - if (line.startsWith(closingIndent)) { - line.substring(closingIndent.length) - } else if (line.trim.isEmpty) { - // Empty or whitespace-only lines - "" - } else { - // Line doesn't start with the closing indentation, keep as-is - line - } + private def extractClosingIndent(str: String, offset: Offset): String = { + val closingIndent = str.linesIterator.toSeq.last + if (!closingIndent.forall(_.isWhitespace)) { + syntaxError( + em"last line of dedented string literal must contain only whitespace before closing delimiter", + offset + ) + return str } - - dedented.mkString("\n") + closingIndent } /* ------------- NEW LINES ------------------------------------------------- */ From 3ea3e7e359b26ca02d3fec836aa14c1376d54ad5 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 00:20:13 +0800 Subject: [PATCH 16/30] wip --- .../dotty/tools/dotc/parsing/Parsers.scala | 144 +++++++----------- 1 file changed, 55 insertions(+), 89 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 97de05e45655..55263cebe79c 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1554,105 +1554,71 @@ object Parsers { in.buf(in.charOffset + 1) == '\'' in.nextToken() - if (isDedented) { - // Collect all string parts and their offsets - val stringParts = new ListBuffer[(String, Offset)] - val interpolatedExprs = new ListBuffer[Tree] - - var offsetCorrection = 3 // triple single quotes - while (in.token == STRINGPART) { - val literalOffset = in.offset + offsetCorrection - stringParts += ((in.strVal, literalOffset)) - offsetCorrection = 0 - in.nextToken() + // Collect all string parts and their offsets + val stringParts = new ListBuffer[(String, Offset)] + val interpolatedExprs = new ListBuffer[Tree] + + var offsetCorrection = if (isDedented) 3 else if (isTripleQuoted) 3 else 1 + while (in.token == STRINGPART) { + val literalOffset = in.offset + offsetCorrection + stringParts += ((in.strVal, literalOffset)) + offsetCorrection = 0 + in.nextToken() - // Collect the interpolated expression - interpolatedExprs += atSpan(in.offset) { - if (in.token == IDENTIFIER) - termIdent() - else if (in.token == USCORE && inPattern) { - in.nextToken() - Ident(nme.WILDCARD) - } - else if (in.token == THIS) { - in.nextToken() - This(EmptyTypeIdent) - } - else if (in.token == LBRACE) - if (inPattern) Block(Nil, inBraces(pattern())) - else expr() - else { - report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) - EmptyTree - } + // Collect the interpolated expression + interpolatedExprs += atSpan(in.offset) { + if (in.token == IDENTIFIER) + termIdent() + else if (in.token == USCORE && inPattern) { + in.nextToken() + Ident(nme.WILDCARD) + } + else if (in.token == THIS) { + in.nextToken() + This(EmptyTypeIdent) + } + else if (in.token == LBRACE) + if (inPattern) Block(Nil, inBraces(pattern())) + else expr() + else { + report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) + EmptyTree } } + } - // Get the final STRINGLIT - val finalLiteral = if (in.token == STRINGLIT) { - val s = in.strVal - val off = in.offset + offsetCorrection - stringParts += ((s, off)) - in.nextToken() - true - } else false + // Get the final STRINGLIT + val finalLiteral = if (in.token == STRINGLIT) { + val s = in.strVal + val off = in.offset + offsetCorrection + stringParts += ((s, off)) + in.nextToken() + true + } else false - // Now dedent all string parts based on the last one's closing indentation - if (stringParts.nonEmpty) { + val dedentedParts = + if (!isDedented) stringParts + else { val lastPart = stringParts.last._1 val closingIndent = extractClosingIndent(lastPart, in.offset) - - // Dedent all parts - val dedentedParts = stringParts.zipWithIndex.map { case ((str, offset), index) => - (dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length-1), offset) + stringParts.zipWithIndex.map { case ((str, offset), index) => + (dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length - 1), offset) } + } - // Build the segments with dedented strings - for (i <- 0 until dedentedParts.size - 1) { - val (dedentedStr, offset) = dedentedParts(i) - segmentBuf += Thicket( - atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) }, - interpolatedExprs(i) - ) - } + // Build the segments with dedented strings + for (i <- 0 until dedentedParts.size - 1) { + val (dedentedStr, offset) = dedentedParts(i) + segmentBuf += Thicket( + atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) }, + interpolatedExprs(i) + ) + } - // Add the final literal if present - if (finalLiteral) { - val (dedentedStr, offset) = dedentedParts.last - segmentBuf += atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) } - } - } - } else { - // Non-dedented string: use original logic - def nextSegment(literalOffset: Offset) = - segmentBuf += Thicket( - literal(literalOffset, inPattern = inPattern, inStringInterpolation = true), - atSpan(in.offset) { - if (in.token == IDENTIFIER) - termIdent() - else if (in.token == USCORE && inPattern) { - in.nextToken() - Ident(nme.WILDCARD) - } - else if (in.token == THIS) { - in.nextToken() - This(EmptyTypeIdent) - } - else if (in.token == LBRACE) - if (inPattern) Block(Nil, inBraces(pattern())) - else expr() - else { - report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) - EmptyTree - } - }) - - var offsetCorrection = if isTripleQuoted then 3 else 1 - while (in.token == STRINGPART) - nextSegment(in.offset + offsetCorrection) - offsetCorrection = 0 - if (in.token == STRINGLIT) - segmentBuf += literal(inPattern = inPattern, negOffset = in.offset + offsetCorrection, inStringInterpolation = true) + // Add the final literal if present + if (finalLiteral) { + val (dedentedStr, offset) = dedentedParts.last + segmentBuf += atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) } } InterpolatedString(interpolator, segmentBuf.toList) From b1613c71fa44e93742bbce849538e997498a6b74 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 00:41:42 +0800 Subject: [PATCH 17/30] . --- .../dotty/tools/dotc/parsing/Parsers.scala | 146 ++++++++++-------- tests/neg/dedented-string-literals.check | 23 ++- tests/run/dedented-string-literals.check | 5 + tests/run/dedented-string-literals.scala | 41 ++--- 4 files changed, 106 insertions(+), 109 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 55263cebe79c..0044296b477c 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1364,71 +1364,76 @@ object Parsers { isFirstPart: Boolean, isLastPart: Boolean): String = { - // Check for mixed tabs and spaces in closing indent - val hasTabs = closingIndent.contains('\t') - val hasSpaces = closingIndent.contains(' ') - if (hasTabs && hasSpaces) { - syntaxError( - em"dedented string literal cannot mix tabs and spaces in indentation", - offset - ) - return str - } + if (closingIndent == "") str + else { + // Check for mixed tabs and spaces in closing indent + + val hasTabs = closingIndent.contains('\t') + val hasSpaces = closingIndent.contains(' ') + if (hasTabs && hasSpaces) { + syntaxError( + em"dedented string literal cannot mix tabs and spaces in indentation", + offset + ) + return str + } - // Split into lines - val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq + // Split into lines + val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq - var lineOffset = offset - def dedentLine(line: String, lineWithSep: String) = { - val result = - if (line.startsWith(closingIndent)) line.substring(closingIndent.length) - else if (line.trim.isEmpty) "" // Empty or whitespace-only lines - else { - // Check if this line has mixed tabs/spaces that don't match closing indent - val lineIndent = line.takeWhile(_.isWhitespace) - val lineHasTabs = lineIndent.contains('\t') - val lineHasSpaces = lineIndent.contains(' ') - if ((hasTabs && lineHasSpaces && !lineHasTabs) || (hasSpaces && lineHasTabs && !lineHasSpaces)) { - syntaxError( - em"dedented string literal cannot mix tabs and spaces in indentation", - offset - ) - } else { - syntaxError( - em"line in dedented string literal must be indented at least as much as the closing delimiter", - lineOffset - ) + var lineOffset = offset + + def dedentLine(line: String, lineWithSep: String) = { + val result = + if (line.startsWith(closingIndent)) line.substring(closingIndent.length) + else if (line.trim.isEmpty) "" // Empty or whitespace-only lines + else { + // Check if this line has mixed tabs/spaces that don't match closing indent + val lineIndent = line.takeWhile(_.isWhitespace) + val lineHasTabs = lineIndent.contains('\t') + val lineHasSpaces = lineIndent.contains(' ') + if ((hasTabs && lineHasSpaces && !lineHasTabs) || (hasSpaces && lineHasTabs && !lineHasSpaces)) { + syntaxError( + em"dedented string literal cannot mix tabs and spaces in indentation", + offset + ) + } else { + syntaxError( + em"line in dedented string literal must be indented at least as much as the closing delimiter", + lineOffset + ) + } + line } - line + lineOffset += lineWithSep.length // Make sure to include any \n, \r, \r\n, or \n\r + result + } + + // If this is the first part of a string, then the first line is the empty string following + // the opening `'''` delimiter, so we skip it. If not, then the first line is immediately + // following an interpolated value, and should be used raw without indenting + val firstLine = + if (isFirstPart) Nil + else { + val (line, lineWithSep) = linesAndWithSeps.head + lineOffset += lineWithSep.length + Seq(line) } - lineOffset += lineWithSep.length // Make sure to include any \n, \r, \r\n, or \n\r - result - } - // If this is the first part of a string, then the first line is the empty string following - // the opening `'''` delimiter, so we skip it. If not, then the first line is immediately - // following an interpolated value, and should be used raw without indenting - val firstLine = - if (isFirstPart) Nil - else { - val (line, lineWithSep) = linesAndWithSeps.head - lineOffset += lineWithSep.length - Seq(line) + // Process all lines except the first and last, which require special handling + val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => + dedentLine(line, lineWithSep) } - // Process all lines except the first and last, which require special handling - val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => - dedentLine(line, lineWithSep) - } + // If this is the last part of the string, then the last line is the indentation-only + // line preceding the closing delimiter, and should be ignored. If not, then the last line + // also needs to be de-dented + val lastLine = + if (isLastPart) Nil + else Seq(dedentLine(linesAndWithSeps.last._1, linesAndWithSeps.last._2)) - // If this is the last part of the string, then the last line is the indentation-only - // line preceding the closing delimiter, and should be ignored. If not, then the last line - // also needs to be de-dented - val lastLine = - if (isLastPart) Nil - else Seq(dedentLine(linesAndWithSeps.last._1, linesAndWithSeps.last._2)) - - (firstLine ++ dedented ++ lastLine).mkString("\n") + (firstLine ++ dedented ++ lastLine).mkString("\n") + } } /** Literal ::= SimpleLiteral @@ -1597,21 +1602,22 @@ object Parsers { } else false val dedentedParts = - if (!isDedented) stringParts + if (!isDedented || stringParts.isEmpty) stringParts else { val lastPart = stringParts.last._1 val closingIndent = extractClosingIndent(lastPart, in.offset) stringParts.zipWithIndex.map { case ((str, offset), index) => - (dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length - 1), offset) + val dedented = dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length - 1) + (dedented, offset) } } // Build the segments with dedented strings - for (i <- 0 until dedentedParts.size - 1) { - val (dedentedStr, offset) = dedentedParts(i) + for ((str, expr) <- dedentedParts.zip(interpolatedExprs)) { + val (dedentedStr, offset) = str segmentBuf += Thicket( atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) }, - interpolatedExprs(i) + expr ) } @@ -1626,14 +1632,26 @@ object Parsers { /** Extract the closing indentation from the last line of a string */ private def extractClosingIndent(str: String, offset: Offset): String = { - val closingIndent = str.linesIterator.toSeq.last - if (!closingIndent.forall(_.isWhitespace)) { + // If the last line is empty, `linesIterator` and `linesWithSeparators` skips + // the empty string, so we must recognize that case and explicitly default to "" + // otherwise things will blow up + val closingIndent = str + .linesIterator + .zip(str.linesWithSeparators) + .toSeq + .lastOption + .filter((line, lineWithSep) => line == lineWithSep) + .map(_._1) + .getOrElse("") + + if (closingIndent.exists(!_.isWhitespace)) { syntaxError( em"last line of dedented string literal must contain only whitespace before closing delimiter", offset ) return str } + closingIndent } diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check index 7c0a359f33dd..7c61c5e8031e 100644 --- a/tests/neg/dedented-string-literals.check +++ b/tests/neg/dedented-string-literals.check @@ -1,25 +1,20 @@ --- Error: tests/neg/dedented-string-literals.scala:5:27 -5 | val noNewlineAfterOpen = '''content on same line // error +-- Error: ---------------------------------------------------------------------- +5 | val noNewlineAfterOpen = '''content on same line // error: dedented string literal must start with a newline | ^ - |dedented string literal must start with newline after opening quotes --- Error: tests/neg/dedented-string-literals.scala:8:20 + | dedented string literal must start with newline after opening quotes +-- Error: ---------------------------------------------------------------------- 8 | val notIndented = ''' | ^ |line in dedented string literal must be indented at least as much as the closing delimiter --- Error: tests/neg/dedented-string-literals.scala:13:24 +-- Error: ---------------------------------------------------------------------- 13 | val mixedTabsSpaces = ''' | ^ - |dedented string literal cannot mix tabs and spaces in indentation --- Error: tests/neg/dedented-string-literals.scala:19:35 + | dedented string literal cannot mix tabs and spaces in indentation +-- Error: ---------------------------------------------------------------------- 19 | val nonWhitespaceBeforeClosing = ''' | ^ |last line of dedented string literal must contain only whitespace before closing delimiter --- [E040] Syntax Error: tests/neg/dedented-string-literals.scala:41:17 +-- Error: ---------------------------------------------------------------------- 41 | val unclosed = ''' | ^ - |unclosed dedented string literal --- Error: tests/neg/dedented-string-literals.scala:35:4 -35 | onlyAtCompileTime // error - | ^^^^^^^^^^^^^^^^^ - |This method should only be used at compile time - |Do not call at runtime + | unclosed dedented string literal \ No newline at end of file diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index f4531e82ef7b..bf7210bf0da1 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -2,6 +2,11 @@ Basic: i am cow hear me moo +No Indent: + +i am cow +hear me moo + With indent: i am cow hear me moo diff --git a/tests/run/dedented-string-literals.scala b/tests/run/dedented-string-literals.scala index 03fc6d597428..e592796cda4d 100644 --- a/tests/run/dedented-string-literals.scala +++ b/tests/run/dedented-string-literals.scala @@ -2,7 +2,6 @@ object Test { def main(args: Array[String]): Unit = { - // Test basic dedenting val basic = ''' i am cow hear me moo @@ -11,23 +10,28 @@ object Test { println(basic) println() - // Test with indentation preserved - val withIndent = ''' + val noIndent = ''' +i am cow +hear me moo +''' + println("No Indent:") + println(noIndent) + println() + + val withIndentPreserved = ''' i am cow hear me moo ''' println("With indent:") - println(withIndent) + println(withIndentPreserved) println() - // Test empty string val empty = ''' ''' println("Empty:") println(s"[${empty}]") println() - // Test single line val singleLine = ''' hello world ''' @@ -35,7 +39,6 @@ object Test { println(singleLine) println() - // Test blank lines val blankLines = ''' line 1 @@ -45,7 +48,6 @@ object Test { println(blankLines) println() - // Test deep indentation removal val deepIndent = ''' deeply indented @@ -55,7 +57,6 @@ object Test { println(deepIndent) println() - // Test mixed indentation levels (preserved) val mixedIndent = ''' first level second level @@ -65,7 +66,6 @@ object Test { println(mixedIndent) println() - // Test extended delimiter with embedded ''' val withTripleQuotes = '''' ''' i am cow @@ -75,7 +75,6 @@ object Test { println(withTripleQuotes) println() - // Test extended delimiter with 5 quotes val extended5 = ''''' '''' content with four quotes @@ -85,7 +84,6 @@ object Test { println(extended5) println() - // Test that newlines are normalized to \n val normalized = ''' line1 line2 @@ -94,7 +92,6 @@ object Test { println(s"Has only LF: ${!normalized.contains('\r')}") println() - // Test special characters val specialChars = ''' !"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ''' @@ -102,7 +99,6 @@ object Test { println(specialChars) println() - // Test unicode val unicode = ''' Hello 世界 ''' @@ -110,7 +106,6 @@ object Test { println(unicode) println() - // Test tabs for indentation val withTabs = ''' tab indented content here @@ -119,7 +114,6 @@ object Test { println(withTabs) println() - // Test empty lines anywhere val emptyLinesAnywhere = ''' content @@ -131,7 +125,6 @@ object Test { println(s"[${emptyLinesAnywhere}]") println() - // Test content with quotes val withQuotes = ''' "double quotes" 'single quote' @@ -141,7 +134,6 @@ object Test { println(withQuotes) println() - // Test zero-width closing indent val zeroIndent = ''' content ''' @@ -149,7 +141,6 @@ object Test { println(zeroIndent) println() - // Test content length and character accuracy val precise = ''' ab cd @@ -160,7 +151,6 @@ object Test { println(s"Chars: ${precise.toList}") println() - // Test with string interpolator val name = "Alice" val age = 30 val interpolated = s''' @@ -171,7 +161,6 @@ object Test { println(interpolated) println() - // Test with f interpolator val value = 42 val formatted = f''' Value: $value%05d @@ -181,7 +170,6 @@ object Test { println(formatted) println() - // Test as pattern def testPattern(s: String): String = s match { case ''' test @@ -195,7 +183,6 @@ object Test { println(s"Pattern result: ${testPattern("test")}") println() - // Test as pattern with interpolator def testInterpolatedPattern(s: String): String = s match { case s''' Hello $_ @@ -206,7 +193,6 @@ object Test { println(s"Interpolated pattern result: ${testInterpolatedPattern("Hello World")}") println() - // Test as pattern with two lines def testPatternTwoLines(s: String): String = s match { case ''' line one @@ -218,7 +204,6 @@ object Test { println(s"Two line pattern result: ${testPatternTwoLines("line one\nline two")}") println() - // Test as pattern with interpolator and two lines def testInterpolatedPatternTwoLines(s: String): String = s match { case s''' First: $_ @@ -230,7 +215,6 @@ object Test { println(s"Two line interpolated result: ${testInterpolatedPatternTwoLines("First: Alice\nSecond: Bob")}") println() - // Test in function context def inFunction = ''' function content more content @@ -239,7 +223,6 @@ object Test { println(inFunction) println() - // Test in class context class InClass { val inClass = ''' class member @@ -251,7 +234,6 @@ object Test { println(classInstance.inClass) println() - // Test in a list val list = List( ''' first @@ -269,7 +251,6 @@ object Test { } println() - // Test nested in expressions val nested = "prefix" + ''' middle ''' + "suffix" @@ -277,7 +258,6 @@ object Test { println(nested) println() - // Test as type ascription (singleton literal type) val typedVal: ''' first line indented line @@ -288,7 +268,6 @@ object Test { println(s"Type matches: ${typedVal == " first line\n indented line\n third line"}") println() - // Test as type parameter to valueOf val valueOfResult = scala.compiletime.constValue[''' alpha beta From 2fd9e0edd294fc38ef4c73e54dbe1d4359505c54 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 00:48:12 +0800 Subject: [PATCH 18/30] . --- tests/run/dedented-string-literals.check | 4 ++++ tests/run/dedented-string-literals.scala | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index bf7210bf0da1..9812f78b1f04 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -81,6 +81,10 @@ Interpolated: Hello Alice You are 30 years old +Escaped Interpolated: +Hello $name +You are $age years old + Formatted: Value: 00042 Done diff --git a/tests/run/dedented-string-literals.scala b/tests/run/dedented-string-literals.scala index e592796cda4d..5dc2adcacffc 100644 --- a/tests/run/dedented-string-literals.scala +++ b/tests/run/dedented-string-literals.scala @@ -161,6 +161,14 @@ hear me moo println(interpolated) println() + val escapedInterpolated = s''' + Hello $$name + You are $$age years old + ''' + println("Escaped Interpolated:") + println(escapedInterpolated) + println() + val value = 42 val formatted = f''' Value: $value%05d From f83defef0cf10a9395c480f85d3a9aee2cb4aaf8 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 01:10:51 +0800 Subject: [PATCH 19/30] . --- compiler/src/dotty/tools/dotc/parsing/Parsers.scala | 4 +++- compiler/src/dotty/tools/dotc/parsing/Scanners.scala | 12 ------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 0044296b477c..49d5fe411223 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1363,7 +1363,9 @@ object Parsers { closingIndent: String, isFirstPart: Boolean, isLastPart: Boolean): String = { - + // Just explicitly do nothing when the `closingIndent` is empty. This is easier than trying + // to ensure that handling of the various `linesIterator`/`linesWithSeparators`/etc. + // APIs behaves predictably in the presence of empty leading/trailing lines if (closingIndent == "") str else { // Check for mixed tabs and spaces in closing indent diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 8aaf81225e45..2a8f279c3ea7 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1288,18 +1288,6 @@ object Scanners { else error(em"unclosed string literal") } - /** Parse a dedented string literal (triple single quotes) - * Requirements: - * - Must start with ''' followed by newline - * - Must end with newline + whitespace + ''' - * - Removes first newline after opening delimiter - * - Removes final newline and preceding whitespace before closing delimiter - * - Strips indentation equal to closing delimiter indentation - * - All lines must be empty or indented further than closing delimiter - * - Supports extended delimiters (e.g., '''', ''''') - * @param isInterpolated If true, handles $ interpolation and returns STRINGPART tokens - * @return The quote count (number of quotes in the delimiter) for storing in the region - */ private def getDedentedString(isInterpolated: Boolean): Int = { // For interpolated strings, we're already at the first character after ''' // For non-interpolated, we need to consume the first character From ac4c475da7e6c60c4e313f0e20ef548f1f1175d6 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 01:11:02 +0800 Subject: [PATCH 20/30] . --- compiler/src/dotty/tools/dotc/parsing/Scanners.scala | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 2a8f279c3ea7..76932a245d87 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1313,17 +1313,6 @@ object Scanners { quoteCount } - /** Parse dedented string content, with optional $ interpolation handling. - * This collects content until hitting $ (if interpolated) or closing delimiter. - * Respects the quote count for extended delimiters. - * - * @param quoteCount The number of quotes in the delimiter (3 for ''', 4 for '''', etc.) - * @param isInterpolated If true, handles $ expressions and returns STRINGPART tokens. - * If false, treats $ as regular content and returns STRINGLIT. - * - * Note: Interpolated strings do NOT dedent during parsing - dedenting must be handled - * at runtime after all parts are assembled. Non-interpolated strings dedent after collection. - */ @tailrec private def getDedentedStringPartWithDelimiter(quoteCount: Int, isInterpolated: Boolean): Unit = // Check for closing delimiter with correct quote count if (ch == '\'') { From 4f2c7f468ce347eb1f7613055ffee5413c1f0c94 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Thu, 16 Oct 2025 01:11:59 +0800 Subject: [PATCH 21/30] wip --- compiler/src/dotty/tools/dotc/parsing/Parsers.scala | 5 +---- compiler/src/dotty/tools/dotc/parsing/Scanners.scala | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 49d5fe411223..83565bb45450 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1446,10 +1446,7 @@ object Parsers { * @param negOffset The offset of a preceding `-' sign, if any. * If the literal is not negated, negOffset == in.offset. */ - def literal(negOffset: Int = in.offset, - inPattern: Boolean = false, - inTypeOrSingleton: Boolean = false, - inStringInterpolation: Boolean = false): Tree = { + def literal(negOffset: Int = in.offset, inPattern: Boolean = false, inTypeOrSingleton: Boolean = false, inStringInterpolation: Boolean = false): Tree = { def literalOf(token: Token): Tree = { val isNegated = negOffset < in.offset def digits0 = in.removeNumberSeparators(in.strVal) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 76932a245d87..34e4a7fbcb18 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1496,7 +1496,6 @@ object Scanners { getStringPart(multiLine) } - private def isTripleQuote(): Boolean = if (ch == '"') { nextRawChar() From 02f9cf2bf2e1978456fd8977ca249347c012227b Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 08:58:54 +0800 Subject: [PATCH 22/30] consolidate interpolator parsing --- .../dotty/tools/dotc/parsing/Scanners.scala | 130 +++++++----------- 1 file changed, 53 insertions(+), 77 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 34e4a7fbcb18..207070799215 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1338,47 +1338,9 @@ object Scanners { } } else if (isInterpolated && ch == '$') { - // Handle interpolation - def getInterpolatedIdentRest(hasSupplement: Boolean): Unit = - @tailrec def loopRest(): Unit = - if ch != SU && isUnicodeIdentifierPart(ch) then - putChar(ch) ; nextRawChar() - loopRest() - else if atSupplementary(ch, isUnicodeIdentifierPart) then - putChar(ch) ; nextRawChar() - putChar(ch) ; nextRawChar() - loopRest() - else - finishNamedToken(IDENTIFIER, target = next) - end loopRest - setStrVal() - token = STRINGPART - next.lastOffset = charOffset - 1 - next.offset = charOffset - 1 - putChar(ch) ; nextRawChar() - if hasSupplement then - putChar(ch) ; nextRawChar() - loopRest() - end getInterpolatedIdentRest - - nextRawChar() - if (ch == '$' || ch == '\'') { - putChar(ch) - nextRawChar() + if (handleStringInterpolation('\'')) { getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) } - else if (ch == '{') { - setStrVal() - token = STRINGPART - } - else if isUnicodeIdentifierStart(ch) || ch == '_' then - getInterpolatedIdentRest(hasSupplement = false) - else if atSupplementary(ch, isUnicodeIdentifierStart) then - getInterpolatedIdentRest(hasSupplement = true) - else - error("invalid string interpolation: `$$`, `$'`, `$`ident or `$`BlockExpr expected".toMessage, off = charOffset - 2) - putChar('$') - getDedentedStringPartWithDelimiter(quoteCount, isInterpolated) } else { val isUnclosedLiteral = !isUnicodeEscape && ch == SU @@ -1391,6 +1353,57 @@ object Scanners { } end getDedentedStringPartWithDelimiter + /** Handle `$` in string interpolations. This is shared between regular strings and dedented strings. + * @param escapeChar The character that can be escaped with `$` (either `"` or `'`) + * @return true if the caller should continue parsing the rest of the string, false otherwise + */ + private def handleStringInterpolation(escapeChar: Char): Boolean = { + def getInterpolatedIdentRest(hasSupplement: Boolean): Unit = + @tailrec def loopRest(): Unit = + if ch != SU && isUnicodeIdentifierPart(ch) then + putChar(ch) ; nextRawChar() + loopRest() + else if atSupplementary(ch, isUnicodeIdentifierPart) then + putChar(ch) ; nextRawChar() + putChar(ch) ; nextRawChar() + loopRest() + else + finishNamedToken(IDENTIFIER, target = next) + end loopRest + setStrVal() + token = STRINGPART + next.lastOffset = charOffset - 1 + next.offset = charOffset - 1 + putChar(ch) ; nextRawChar() + if hasSupplement then + putChar(ch) ; nextRawChar() + loopRest() + end getInterpolatedIdentRest + + nextRawChar() + if (ch == '$' || ch == escapeChar) { + putChar(ch) + nextRawChar() + true // continue parsing + } + else if (ch == '{') { + setStrVal() + token = STRINGPART + false // don't continue, we're done with this string part + } + else if isUnicodeIdentifierStart(ch) || ch == '_' then + getInterpolatedIdentRest(hasSupplement = false) + false // don't continue, identifier rest handles it + else if atSupplementary(ch, isUnicodeIdentifierStart) then + getInterpolatedIdentRest(hasSupplement = true) + false // don't continue, identifier rest handles it + else + val escapeDesc = if escapeChar == '"' then "`$\"\"`, " else "`$'`, " + error(s"invalid string interpolation: `$$$$`, $escapeDesc`$$`ident or `$$`BlockExpr expected".toMessage, off = charOffset - 2) + putChar('$') + true // continue parsing after error + } + private def getRawStringLit(): Unit = if (ch == '\"') { nextRawChar() @@ -1435,46 +1448,9 @@ object Scanners { getStringPart(multiLine) } else if (ch == '$') { - def getInterpolatedIdentRest(hasSupplement: Boolean): Unit = - @tailrec def loopRest(): Unit = - if ch != SU && isUnicodeIdentifierPart(ch) then - putChar(ch) ; nextRawChar() - loopRest() - else if atSupplementary(ch, isUnicodeIdentifierPart) then - putChar(ch) ; nextRawChar() - putChar(ch) ; nextRawChar() - loopRest() - else - finishNamedToken(IDENTIFIER, target = next) - end loopRest - setStrVal() - token = STRINGPART - next.lastOffset = charOffset - 1 - next.offset = charOffset - 1 - putChar(ch) ; nextRawChar() - if hasSupplement then - putChar(ch) ; nextRawChar() - loopRest() - end getInterpolatedIdentRest - - nextRawChar() - if (ch == '$' || ch == '"') { - putChar(ch) - nextRawChar() + if (handleStringInterpolation('"')) { getStringPart(multiLine) } - else if (ch == '{') { - setStrVal() - token = STRINGPART - } - else if isUnicodeIdentifierStart(ch) || ch == '_' then - getInterpolatedIdentRest(hasSupplement = false) - else if atSupplementary(ch, isUnicodeIdentifierStart) then - getInterpolatedIdentRest(hasSupplement = true) - else - error("invalid string interpolation: `$$`, `$\"`, `$`ident or `$`BlockExpr expected".toMessage, off = charOffset - 2) - putChar('$') - getStringPart(multiLine) } else { val isUnclosedLiteral = !isUnicodeEscape && (ch == SU || (!multiLine && (ch == CR || ch == LF))) From 88627649377279f602ddd98930d0adc3b09b72fb Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 08:59:30 +0800 Subject: [PATCH 23/30] . --- compiler/src/dotty/tools/dotc/parsing/Scanners.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 207070799215..cc5abad1a5fa 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1353,10 +1353,6 @@ object Scanners { } end getDedentedStringPartWithDelimiter - /** Handle `$` in string interpolations. This is shared between regular strings and dedented strings. - * @param escapeChar The character that can be escaped with `$` (either `"` or `'`) - * @return true if the caller should continue parsing the rest of the string, false otherwise - */ private def handleStringInterpolation(escapeChar: Char): Boolean = { def getInterpolatedIdentRest(hasSupplement: Boolean): Unit = @tailrec def loopRest(): Unit = From 3aefb59ca5cf0e0ee5ecfe382d01898a3c49b176 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 09:07:44 +0800 Subject: [PATCH 24/30] cleanup --- .../dotty/tools/dotc/parsing/Parsers.scala | 36 +++++-------------- .../dotty/tools/dotc/parsing/Scanners.scala | 17 +++------ 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 83565bb45450..0002ac6c9329 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1368,11 +1368,7 @@ object Parsers { // APIs behaves predictably in the presence of empty leading/trailing lines if (closingIndent == "") str else { - // Check for mixed tabs and spaces in closing indent - - val hasTabs = closingIndent.contains('\t') - val hasSpaces = closingIndent.contains(' ') - if (hasTabs && hasSpaces) { + if (closingIndent.contains('\t') && closingIndent.contains(' ')) { syntaxError( em"dedented string literal cannot mix tabs and spaces in indentation", offset @@ -1380,9 +1376,7 @@ object Parsers { return str } - // Split into lines val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq - var lineOffset = offset def dedentLine(line: String, lineWithSep: String) = { @@ -1390,21 +1384,10 @@ object Parsers { if (line.startsWith(closingIndent)) line.substring(closingIndent.length) else if (line.trim.isEmpty) "" // Empty or whitespace-only lines else { - // Check if this line has mixed tabs/spaces that don't match closing indent - val lineIndent = line.takeWhile(_.isWhitespace) - val lineHasTabs = lineIndent.contains('\t') - val lineHasSpaces = lineIndent.contains(' ') - if ((hasTabs && lineHasSpaces && !lineHasTabs) || (hasSpaces && lineHasTabs && !lineHasSpaces)) { - syntaxError( - em"dedented string literal cannot mix tabs and spaces in indentation", - offset - ) - } else { - syntaxError( - em"line in dedented string literal must be indented at least as much as the closing delimiter", - lineOffset - ) - } + syntaxError( + em"line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix", + lineOffset + ) line } lineOffset += lineWithSep.length // Make sure to include any \n, \r, \r\n, or \n\r @@ -1551,14 +1534,15 @@ object Parsers { in.charOffset + 1 < in.buf.length && in.buf(in.charOffset) == '"' && in.buf(in.charOffset + 1) == '"' + val isDedented = in.charOffset + 2 < in.buf.length && in.buf(in.charOffset - 1) == '\'' && in.buf(in.charOffset) == '\'' && in.buf(in.charOffset + 1) == '\'' + in.nextToken() - // Collect all string parts and their offsets val stringParts = new ListBuffer[(String, Offset)] val interpolatedExprs = new ListBuffer[Tree] @@ -1569,7 +1553,6 @@ object Parsers { offsetCorrection = 0 in.nextToken() - // Collect the interpolated expression interpolatedExprs += atSpan(in.offset) { if (in.token == IDENTIFIER) termIdent() @@ -1591,7 +1574,6 @@ object Parsers { } } - // Get the final STRINGLIT val finalLiteral = if (in.token == STRINGLIT) { val s = in.strVal val off = in.offset + offsetCorrection @@ -1611,7 +1593,6 @@ object Parsers { } } - // Build the segments with dedented strings for ((str, expr) <- dedentedParts.zip(interpolatedExprs)) { val (dedentedStr, offset) = str segmentBuf += Thicket( @@ -1620,8 +1601,7 @@ object Parsers { ) } - // Add the final literal if present - if (finalLiteral) { + if (finalLiteral) { // Add the final literal if present val (dedentedStr, offset) = dedentedParts.last segmentBuf += atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) } } diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index cc5abad1a5fa..d4185ca2529d 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -990,26 +990,17 @@ object Scanners { case '\'' => def fetchSingleQuote(): Unit = { nextChar() - // Check for triple single quote (dedented string literal) - if (ch == '\'') { + if (ch == '\'') { // Check for triple single quote (dedented string literal) nextChar() if (ch == '\'') { - // We have at least ''' - // Check if this is an interpolated dedented string + // We have at least ''' check if this is an interpolated dedented string if (token == INTERPOLATIONID) { - // For interpolation, handle as string part nextRawChar() val quoteCount = getDedentedString(isInterpolated = true) currentRegion = InDedentedString(quoteCount, currentRegion) - } else { - getDedentedString(isInterpolated = false) - // No need to store quoteCount for non-interpolated strings - } - } - else { - // We have '' followed by something else - error(em"empty character literal") + } else getDedentedString(isInterpolated = false) } + else error(em"empty character literal") // We have '' followed by something else } else if isIdentifierStart(ch) then charLitOr { getIdentRest(); QUOTEID } From c288c38ccb93b1fc637bc755e4ac74a999a71028 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 09:58:35 +0800 Subject: [PATCH 25/30] cleanup --- compiler/src/dotty/tools/dotc/parsing/Scanners.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index d4185ca2529d..6574471a15f7 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -1385,7 +1385,7 @@ object Scanners { getInterpolatedIdentRest(hasSupplement = true) false // don't continue, identifier rest handles it else - val escapeDesc = if escapeChar == '"' then "`$\"\"`, " else "`$'`, " + val escapeDesc = if escapeChar == '"' then "`$\"`, " else "`$'`, " error(s"invalid string interpolation: `$$$$`, $escapeDesc`$$`ident or `$$`BlockExpr expected".toMessage, off = charOffset - 2) putChar('$') true // continue parsing after error From 68e1742d79aab8c0f2f4b45aa0bff8c574e00636 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 11:06:36 +0800 Subject: [PATCH 26/30] . --- tests/run/dedented-string-literals.check | 62 ++++++++++++------------ tests/run/dedented-string-literals.scala | 60 +++++++++++------------ 2 files changed, 61 insertions(+), 61 deletions(-) diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index 9812f78b1f04..7c9b0172b533 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -1,128 +1,128 @@ Basic: i am cow hear me moo - +---- No Indent: - +---- i am cow hear me moo - +---- With indent: i am cow hear me moo - +---- Empty: [] - +---- Single line: hello world - +---- Blank lines: line 1 line 3 - +---- Deep indent: deeply indented content - +---- Mixed indent: first level second level third level - +---- With triple quotes: ''' i am cow ''' - +---- Extended 5 quotes: '''' content with four quotes '''' - +---- Normalized newlines: Has only LF: true - +---- Special chars: !"#$%&()*+,-./:;<=>?@[\]^_`{|}~ - +---- Unicode: Hello 世界 - +---- With tabs: tab indented content here - +---- Empty lines anywhere: [ content more content ] - +---- With quotes: "double quotes" 'single quote' '' - +---- Zero indent: content - +---- Precise: Length: 9 Content: [ ab cd] Chars: List( , , a, b, , , , c, d) - +---- Interpolated: Hello Alice You are 30 years old - +---- Escaped Interpolated: Hello $name You are $age years old - +---- Formatted: Value: 00042 Done - +---- Pattern matching: Pattern result: matched basic - +---- Interpolated pattern: Interpolated pattern result: matched greeting - +---- Pattern matching (two lines): Two line pattern result: matched two lines - +---- Interpolated pattern (two lines): Two line interpolated result: matched two line greeting - +---- In function: function content more content - +---- In class: class member content - +---- In list: Item: [ first] Item: [ second] Item: [ third] - +---- Nested in expressions: prefix middlesuffix - +---- Type ascription: Value: [ first line indented line third line] Type matches: true - +---- valueOf test: Value: [ alpha beta diff --git a/tests/run/dedented-string-literals.scala b/tests/run/dedented-string-literals.scala index 5dc2adcacffc..643c83293a30 100644 --- a/tests/run/dedented-string-literals.scala +++ b/tests/run/dedented-string-literals.scala @@ -8,7 +8,7 @@ object Test { ''' println("Basic:") println(basic) - println() + println("----") val noIndent = ''' i am cow @@ -16,7 +16,7 @@ hear me moo ''' println("No Indent:") println(noIndent) - println() + println("----") val withIndentPreserved = ''' i am cow @@ -24,20 +24,20 @@ hear me moo ''' println("With indent:") println(withIndentPreserved) - println() + println("----") val empty = ''' ''' println("Empty:") println(s"[${empty}]") - println() + println("----") val singleLine = ''' hello world ''' println("Single line:") println(singleLine) - println() + println("----") val blankLines = ''' line 1 @@ -46,7 +46,7 @@ hear me moo ''' println("Blank lines:") println(blankLines) - println() + println("----") val deepIndent = ''' deeply @@ -55,7 +55,7 @@ hear me moo ''' println("Deep indent:") println(deepIndent) - println() + println("----") val mixedIndent = ''' first level @@ -64,7 +64,7 @@ hear me moo ''' println("Mixed indent:") println(mixedIndent) - println() + println("----") val withTripleQuotes = '''' ''' @@ -73,7 +73,7 @@ hear me moo '''' println("With triple quotes:") println(withTripleQuotes) - println() + println("----") val extended5 = ''''' '''' @@ -82,7 +82,7 @@ hear me moo ''''' println("Extended 5 quotes:") println(extended5) - println() + println("----") val normalized = ''' line1 @@ -90,21 +90,21 @@ hear me moo ''' println("Normalized newlines:") println(s"Has only LF: ${!normalized.contains('\r')}") - println() + println("----") val specialChars = ''' !"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ''' println("Special chars:") println(specialChars) - println() + println("----") val unicode = ''' Hello 世界 ''' println("Unicode:") println(unicode) - println() + println("----") val withTabs = ''' tab indented @@ -112,7 +112,7 @@ hear me moo ''' println("With tabs:") println(withTabs) - println() + println("----") val emptyLinesAnywhere = ''' @@ -123,7 +123,7 @@ hear me moo ''' println("Empty lines anywhere:") println(s"[${emptyLinesAnywhere}]") - println() + println("----") val withQuotes = ''' "double quotes" @@ -132,14 +132,14 @@ hear me moo ''' println("With quotes:") println(withQuotes) - println() + println("----") val zeroIndent = ''' content ''' println("Zero indent:") println(zeroIndent) - println() + println("----") val precise = ''' ab @@ -149,7 +149,7 @@ hear me moo println(s"Length: ${precise.length}") println(s"Content: [${precise}]") println(s"Chars: ${precise.toList}") - println() + println("----") val name = "Alice" val age = 30 @@ -159,7 +159,7 @@ hear me moo ''' println("Interpolated:") println(interpolated) - println() + println("----") val escapedInterpolated = s''' Hello $$name @@ -167,7 +167,7 @@ hear me moo ''' println("Escaped Interpolated:") println(escapedInterpolated) - println() + println("----") val value = 42 val formatted = f''' @@ -176,7 +176,7 @@ hear me moo ''' println("Formatted:") println(formatted) - println() + println("----") def testPattern(s: String): String = s match { case ''' @@ -189,7 +189,7 @@ hear me moo } println("Pattern matching:") println(s"Pattern result: ${testPattern("test")}") - println() + println("----") def testInterpolatedPattern(s: String): String = s match { case s''' @@ -199,7 +199,7 @@ hear me moo } println("Interpolated pattern:") println(s"Interpolated pattern result: ${testInterpolatedPattern("Hello World")}") - println() + println("----") def testPatternTwoLines(s: String): String = s match { case ''' @@ -210,7 +210,7 @@ hear me moo } println("Pattern matching (two lines):") println(s"Two line pattern result: ${testPatternTwoLines("line one\nline two")}") - println() + println("----") def testInterpolatedPatternTwoLines(s: String): String = s match { case s''' @@ -221,7 +221,7 @@ hear me moo } println("Interpolated pattern (two lines):") println(s"Two line interpolated result: ${testInterpolatedPatternTwoLines("First: Alice\nSecond: Bob")}") - println() + println("----") def inFunction = ''' function content @@ -229,7 +229,7 @@ hear me moo ''' println("In function:") println(inFunction) - println() + println("----") class InClass { val inClass = ''' @@ -240,7 +240,7 @@ hear me moo val classInstance = new InClass println("In class:") println(classInstance.inClass) - println() + println("----") val list = List( ''' @@ -257,14 +257,14 @@ hear me moo list.foreach { item => println(s"Item: [$item]") } - println() + println("----") val nested = "prefix" + ''' middle ''' + "suffix" println("Nested in expressions:") println(nested) - println() + println("----") val typedVal: ''' first line @@ -274,7 +274,7 @@ hear me moo println("Type ascription:") println(s"Value: [$typedVal]") println(s"Type matches: ${typedVal == " first line\n indented line\n third line"}") - println() + println("----") val valueOfResult = scala.compiletime.constValue[''' alpha From 062b3ef432643df57b0ac422946fd29d46c1294e Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 11:34:47 +0800 Subject: [PATCH 27/30] wip --- tests/run/dedented-string-literals.check | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/run/dedented-string-literals.check b/tests/run/dedented-string-literals.check index 7c9b0172b533..06604141dea8 100644 --- a/tests/run/dedented-string-literals.check +++ b/tests/run/dedented-string-literals.check @@ -3,9 +3,10 @@ i am cow hear me moo ---- No Indent: ----- + i am cow hear me moo + ---- With indent: i am cow @@ -74,7 +75,7 @@ Precise: Length: 9 Content: [ ab cd] -Chars: List( , , a, b, +Chars: List( , , a, b, , , , c, d) ---- Interpolated: From 3ea641600e75bf2fa9aab0dde3f4af0b2ccebfdb Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 13:43:35 +0800 Subject: [PATCH 28/30] Update dedented-string-literals.scala --- tests/neg/dedented-string-literals.scala | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index 9e39d967b6bf..b9866512fa16 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -21,20 +21,20 @@ content // error: line in dedented string literal is indented less than the clos text''' // error: last line of dedented string literal must contain only whitespace before closing delimiter } -// Test @compileTimeOnly with dedented string -object CompileTimeOnlyTest { - import scala.annotation.compileTimeOnly - - @compileTimeOnly(''' - This method should only be used at compile time - Do not call at runtime - ''') - def onlyAtCompileTime: Unit = () - - def test(): Unit = { - onlyAtCompileTime // error - } -} + + + + + + + + + + + + + + // Error: Unclosed literal - must be last since it breaks parsing object UnclosedTest { From 049b6cb80a09f89f4e85b34f4b991a3c8167549a Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 17:01:48 +0800 Subject: [PATCH 29/30] . --- .../dotty/tools/dotc/parsing/Parsers.scala | 33 ++++++++++-------- tests/neg/dedented-string-literals.check | 34 +++++++++---------- tests/neg/dedented-string-literals.scala | 34 ++++--------------- 3 files changed, 43 insertions(+), 58 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 0002ac6c9329..63b9e88df1f6 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1379,6 +1379,9 @@ object Parsers { val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq var lineOffset = offset + + while(in.buf(lineOffset) == '\'') lineOffset += 1 + def dedentLine(line: String, lineWithSep: String) = { val result = if (line.startsWith(closingIndent)) line.substring(closingIndent.length) @@ -1397,13 +1400,11 @@ object Parsers { // If this is the first part of a string, then the first line is the empty string following // the opening `'''` delimiter, so we skip it. If not, then the first line is immediately // following an interpolated value, and should be used raw without indenting - val firstLine = - if (isFirstPart) Nil - else { - val (line, lineWithSep) = linesAndWithSeps.head - lineOffset += lineWithSep.length - Seq(line) - } + val firstLine = { + val (line, lineWithSep) = linesAndWithSeps.head + lineOffset += lineWithSep.length + if (isFirstPart) Nil else Seq(line) + } // Process all lines except the first and last, which require special handling val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => @@ -1454,7 +1455,9 @@ object Parsers { // For non-interpolated dedented strings, check if the token starts with ''' val str = in.strVal if (token == STRINGLIT && !inStringInterpolation && isDedentedStringLiteral(negOffset)) { - dedentString(str, negOffset, extractClosingIndent(str, negOffset), true, true) + val (closingIdent, succeeded) = extractClosingIndent(str, negOffset) + if (succeeded) dedentString(str, negOffset, closingIdent, true, true) + else str } else str case TRUE => true case FALSE => false @@ -1586,7 +1589,7 @@ object Parsers { if (!isDedented || stringParts.isEmpty) stringParts else { val lastPart = stringParts.last._1 - val closingIndent = extractClosingIndent(lastPart, in.offset) + val (closingIndent, succeeded) = extractClosingIndent(lastPart, in.offset) stringParts.zipWithIndex.map { case ((str, offset), index) => val dedented = dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length - 1) (dedented, offset) @@ -1610,7 +1613,7 @@ object Parsers { } /** Extract the closing indentation from the last line of a string */ - private def extractClosingIndent(str: String, offset: Offset): String = { + private def extractClosingIndent(str: String, offset: Offset): (String, Boolean) = { // If the last line is empty, `linesIterator` and `linesWithSeparators` skips // the empty string, so we must recognize that case and explicitly default to "" // otherwise things will blow up @@ -1624,14 +1627,16 @@ object Parsers { .getOrElse("") if (closingIndent.exists(!_.isWhitespace)) { + var lineOffset = offset + while(in.buf(lineOffset) == '\'') lineOffset += 1 syntaxError( em"last line of dedented string literal must contain only whitespace before closing delimiter", - offset + lineOffset + str.linesWithSeparators.toList.dropRight(1).map(_.size).sum ) - return str + (str, false) + } else { + (closingIndent, true) } - - closingIndent } /* ------------- NEW LINES ------------------------------------------------- */ diff --git a/tests/neg/dedented-string-literals.check b/tests/neg/dedented-string-literals.check index 7c61c5e8031e..e760a5f4f1d7 100644 --- a/tests/neg/dedented-string-literals.check +++ b/tests/neg/dedented-string-literals.check @@ -1,20 +1,20 @@ --- Error: ---------------------------------------------------------------------- -5 | val noNewlineAfterOpen = '''content on same line // error: dedented string literal must start with a newline +-- Error: tests/neg/dedented-string-literals.scala:4:27 ---------------------------------------------------------------- +4 | val noNewlineAfterOpen = '''content on same line // error: dedented string literal must start with newline after opening quotes | ^ - | dedented string literal must start with newline after opening quotes --- Error: ---------------------------------------------------------------------- -8 | val notIndented = ''' - | ^ - |line in dedented string literal must be indented at least as much as the closing delimiter --- Error: ---------------------------------------------------------------------- -13 | val mixedTabsSpaces = ''' - | ^ - | dedented string literal cannot mix tabs and spaces in indentation --- Error: ---------------------------------------------------------------------- -19 | val nonWhitespaceBeforeClosing = ''' - | ^ + | dedented string literal must start with newline after opening quotes +-- Error: tests/neg/dedented-string-literals.scala:7:0 ----------------------------------------------------------------- +7 |content // error: line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix + |^ + |line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix +-- Error: tests/neg/dedented-string-literals.scala:11:0 ---------------------------------------------------------------- +11 | tab line // error: line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix + |^ + |line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix +-- Error: tests/neg/dedented-string-literals.scala:17:0 ---------------------------------------------------------------- +17 | text''' // error: last line of dedented string literal must contain only whitespace before closing delimiter + |^ |last line of dedented string literal must contain only whitespace before closing delimiter --- Error: ---------------------------------------------------------------------- -41 | val unclosed = ''' +-- Error: tests/neg/dedented-string-literals.scala:21:17 --------------------------------------------------------------- +21 | val unclosed = ''' // error: unclosed dedented string literal | ^ - | unclosed dedented string literal \ No newline at end of file + | dedented string literal must start with newline after opening quotes diff --git a/tests/neg/dedented-string-literals.scala b/tests/neg/dedented-string-literals.scala index b9866512fa16..f04e4aeda8b9 100644 --- a/tests/neg/dedented-string-literals.scala +++ b/tests/neg/dedented-string-literals.scala @@ -1,42 +1,22 @@ // Test error cases for dedented string literals object DedentedStringErrors { - // Error: No newline after opening quotes - val noNewlineAfterOpen = '''content on same line // error: dedented string literal must start with a newline + val noNewlineAfterOpen = '''content on same line // error: dedented string literal must start with newline after opening quotes - // Error: Content not indented enough - val notIndented = ''' -content // error: line in dedented string literal is indented less than the closing delimiter + val notIndentedEnough = ''' +content // error: line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix ''' - // Error: Mixed tabs and spaces - first line has tab, but closing delimiter has spaces val mixedTabsSpaces = ''' - tab line // error: line in dedented string literal is indented less than the closing delimiter + tab line // error: line in dedented string literal must be indented at least as much as the closing delimiter with an identical prefix space line - ''' + ''' - // Error: Non-whitespace before closing delimiter val nonWhitespaceBeforeClosing = ''' content here text''' // error: last line of dedented string literal must contain only whitespace before closing delimiter } - - - - - - - - - - - - - - - -// Error: Unclosed literal - must be last since it breaks parsing object UnclosedTest { - val unclosed = ''' - some content // error: unclosed dedented string literal + val unclosed = ''' // error: unclosed dedented string literal + some content From f4a507fdbfbc4bb534e7a8fc38c29e60d3ca9085 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sat, 25 Oct 2025 17:02:38 +0800 Subject: [PATCH 30/30] . --- compiler/src/dotty/tools/dotc/parsing/Parsers.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 63b9e88df1f6..3fe28dd79192 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1378,8 +1378,7 @@ object Parsers { val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq var lineOffset = offset - - + // start counting error location offsets only after opening delimiter while(in.buf(lineOffset) == '\'') lineOffset += 1 def dedentLine(line: String, lineWithSep: String) = { @@ -1628,6 +1627,7 @@ object Parsers { if (closingIndent.exists(!_.isWhitespace)) { var lineOffset = offset + // start counting error location offsets only after opening delimiter while(in.buf(lineOffset) == '\'') lineOffset += 1 syntaxError( em"last line of dedented string literal must contain only whitespace before closing delimiter",