From 7c607bc2ff95684a7d14b665ae460b558b5d5677 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 20 Dec 2020 10:15:03 -0700 Subject: [PATCH] Remove The Unicode Escape \u000E In Scaladoc Code Comment Parsing The regular expressions for `CodeBlockStartRegex` and `CodeBlockEndRegex` both contain two instances of the Unicode escape `\u000E`. This is the "Shift In" character. I expect that it was inserted as part of a copy/paste error. Unicode escapes in triple quote strings are deprecated as of 2.13.2 (https://github.com/scala/scala/pull/8282). Further, this character actually makes the regular expression invalid if it is interpreted. This isn't a big deal right now, as it appears to be ignored on Scala 2.12.x, but on Scala 2.13.x this will cause the regular expressions to fail for Scaladoc using the `
` tag. For example,

```scala
import scala.util.matching._

object Main {

  val doc0: String =
    """
    | /** A foo is a bar, for example.
    |   *
    |   * {{{
    |   * val foo: String = "bar"
    |   * }}}
    |   *
    |   * 
    |   * val bar: String = "baz
    |   * 
| */""".stripMargin val CodeBlockStartRegex = new Regex("""(.*?)((?:\{\{\{)|(?:\u000E]*)?>\u000E))(.*)""") val CodeBlockStartRegex0 = new Regex("""(.*?)((?:\{\{\{)|(?:]*)?>))(.*)""") def matchInfo(regex: Regex, value: CharSequence): Unit = { println(s"\nTarget: ${value}") println(s"Regex: ${regex}") val matches: List[Regex.Match] = regex.findAllMatchIn(value).toList println(s"Match Count: ${matches.size}") println(s"Matches: ${matches}") } def main(args: Array[String]): Unit = { matchInfo(CodeBlockStartRegex, doc0) matchInfo(CodeBlockStartRegex0, doc0) } } ``` When run with 2.13.4 yields this result, ```shell warning: 1 deprecation (since 2.13.2); re-run with -deprecation for details 1 warning Picked up JAVA_TOOL_OPTIONS: -Dsbt.supershell=false Target: /** A foo is a bar, for example. * * {{{ * val foo: String = "bar" * }}} * *
   * val bar: String = "baz
   * 
*/ Regex: (.*?)((?:\{\{\{)|(?:]*)?>))(.*) Match Count: 1 Matches: List( * {{{) Target: /** A foo is a bar, for example. * * {{{ * val foo: String = "bar" * }}} * *
   * val bar: String = "baz
   * 
*/ Regex: (.*?)((?:\{\{\{)|(?:]*)?>))(.*) Match Count: 2 Matches: List( * {{{, *
)
```

Note how the first output only found one match, the `{{{` based one, but the second one found both.

Finally, a small test was added to ensure that the change does not break comment parsing.
---
 .../internal/docstrings/ScaladocParser.scala  |  4 +--
 .../src/test/scala/tests/ScaladocSuite.scala  | 32 +++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100644 tests/unit/src/test/scala/tests/ScaladocSuite.scala

diff --git a/mtags/src/main/scala-2/scala/meta/internal/docstrings/ScaladocParser.scala b/mtags/src/main/scala-2/scala/meta/internal/docstrings/ScaladocParser.scala
index 764fad547bc..d18a22abc02 100644
--- a/mtags/src/main/scala-2/scala/meta/internal/docstrings/ScaladocParser.scala
+++ b/mtags/src/main/scala-2/scala/meta/internal/docstrings/ScaladocParser.scala
@@ -234,13 +234,13 @@ object ScaladocParser {
    * The start of a Scaladoc code block
    */
   private val CodeBlockStartRegex =
-    new Regex("""(.*?)((?:\{\{\{)|(?:\u000E]*)?>\u000E))(.*)""")
+    new Regex("""(.*?)((?:\{\{\{)|(?:]*)?>))(.*)""")
 
   /**
    * The end of a Scaladoc code block
    */
   private val CodeBlockEndRegex =
-    new Regex("""(.*?)((?:\}\}\})|(?:\u000E
\u000E))(.*)""") + new Regex("""(.*?)((?:\}\}\})|(?:
))(.*)""") /** * A key used for a tag map. The key is built from the name of the tag and diff --git a/tests/unit/src/test/scala/tests/ScaladocSuite.scala b/tests/unit/src/test/scala/tests/ScaladocSuite.scala new file mode 100644 index 00000000000..345224e27f1 --- /dev/null +++ b/tests/unit/src/test/scala/tests/ScaladocSuite.scala @@ -0,0 +1,32 @@ +package tests + +import scala.meta.internal.docstrings._ + +import munit.Location + +final class ScaladocSuite extends BaseSuite { + + /** + * Comment does not directly declare a meaningful equality definition, thus + * this check compares [[Comment.body]] instead. + */ + def checkCommentBody(name: String, original: String, expected: Body)(implicit + loc: Location + ): Unit = + test(name) { + val obtained: Comment = ScaladocParser.parseComment(original) + assertEquals(obtained.body, expected) + } + + checkCommentBody( + "Brace ({{{) style code comment", + """/**{{{val foo: Int = 1}}} */""", + Body(List(Code("val foo: Int = 1"))) + ) + + checkCommentBody( + "HTML
 style code comment",
+    """/**
val foo: Int = 1
*/""", + Body(List(Code("val foo: Int = 1"))) + ) +}