Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unicode escapes are ordinary escape sequences #8480

Merged
merged 2 commits into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ abstract class CharArrayReader { self =>
protected def startFrom: Int = 0

/** Switch whether unicode should be decoded */
protected def decodeUni: Boolean = true
protected def decodeUni: Boolean = false

/** An error routine to call on bad unicode escapes \\uxxxx. */
protected def error(msg: String, offset: Int): Unit
Expand Down
2 changes: 2 additions & 0 deletions compiler/src/dotty/tools/dotc/parsing/JavaScanners.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ object JavaScanners {

class JavaScanner(source: SourceFile, override val startFrom: Offset = 0)(implicit ctx: Context) extends ScannerCommon(source)(ctx) {

override def decodeUni: Boolean = true

def toToken(name: SimpleName): Token = {
val idx = name.start
if (idx >= 0 && idx <= lastKeywordStart) kwArray(idx) else IDENTIFIER
Expand Down
23 changes: 23 additions & 0 deletions compiler/src/dotty/tools/dotc/parsing/Scanners.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,26 @@ object Scanners {
* and advance to next character.
*/
protected def getLitChar(): Unit =
def invalidUnicodeEscape() = {
error("invalid character in unicode escape sequence", charOffset - 1)
putChar(ch)
}
def putUnicode(): Unit = {
while ch == 'u' || ch == 'U' do nextChar()
var i = 0
var cp = 0
while (i < 4) {
val shift = (3 - i) * 4
val d = digit2int(ch, 16)
if(d < 0) {
return invalidUnicodeEscape()
}
cp += (d << shift)
nextChar()
i += 1
}
putChar(cp.asInstanceOf[Char])
}
if (ch == '\\') {
nextChar()
if ('0' <= ch && ch <= '7') {
Expand All @@ -1153,6 +1173,9 @@ object Scanners {
}
putChar(oct.toChar)
}
else if (ch == 'u' || ch == 'U') {
putUnicode()
}
else {
ch match {
case 'b' => putChar('\b')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ class StringInterpolatorOpt extends MiniPhase {
}
}

//Extract the position from InvalidUnicodeEscapeException
//which due to bincompat reasons is unaccessible.
//TODO: remove once there is less restrictive bincompat
private object InvalidEscapePosition {
def unapply(t: Throwable): Option[Int] = t match {
case iee: StringContext.InvalidEscapeException => Some(iee.index)
case il: IllegalArgumentException => il.getMessage() match {
case s"""invalid unicode escape at index $index of $_""" => index.toIntOption
case _ => None
}
case _ => None
}
}

/**
* Match trees that resemble s and raw string interpolations. In the case of the s
* interpolator, escapes the string constants. Exposes the string constants as well as
Expand All @@ -74,14 +88,22 @@ class StringInterpolatorOpt extends MiniPhase {
case SOrRawInterpolator(strs, elems) =>
if (tree.symbol == defn.StringContext_raw) Some(strs, elems)
else { // tree.symbol == defn.StringContextS
import dotty.tools.dotc.util.SourcePosition
var stringPosition: SourcePosition = null
try {
val escapedStrs = strs.map { str =>
val escapedValue = StringContext.processEscapes(str.const.stringValue)
cpy.Literal(str)(Constant(escapedValue))
}
val escapedStrs = strs.map(str => {
stringPosition = str.sourcePos
val escaped = StringContext.processEscapes(str.const.stringValue)
cpy.Literal(str)(Constant(escaped))
})
Some(escapedStrs, elems)
} catch {
case _: StringContext.InvalidEscapeException => None
case t @ InvalidEscapePosition(p) => {
val errorSpan = stringPosition.span.startPos.shift(p)
val errorPosition = stringPosition.withSpan(errorSpan)
ctx.error(t.getMessage() + "\n", errorPosition)
None
}
}
}
case _ => None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package dotty.tools
package dotc
package parsing

import ast.untpd._
import org.junit.Test

class StringInterpolationPositionTest extends ParserTest {

val tq = "\"\"\""
val program = s"""
|class A {
| val expr = 42
| val s0 = s"string1"
| val s1 = s"string1$${expr}string2"
| val s2 = s"string1$${expr}string2$${expr}string3"
| val s0m = s${tq}string1${tq}
| val s1m = s${tq}string1$${expr}string2${tq}
| val s2m = s${tq}string1$${expr}string2$${expr}string3${tq}
|}""".stripMargin

@Test
def interpolationLiteralPosition: Unit = {
val t = parseText(program)
t match {
case PackageDef(_, List(TypeDef(_, Template(_, _, _, statements: List[Tree])))) => {
val interpolations = statements.collect{ case ValDef(_, _, InterpolatedString(_, int)) => int }
val lits = interpolations.flatten.flatMap {
case l @ Literal(_) => List(l)
case Thicket(trees) => trees.collect { case l @ Literal(_) => l }
}
for {
lit <- lits
Literal(c) = lit
str <- List(c.value).collect { case str: String => str}
} {
val fromPos = program.substring(lit.span.start, lit.span.end)
assert(fromPos == str, s"$fromPos == $str")
}
}
}
}
}
5 changes: 1 addition & 4 deletions tests/neg/firstError.scala
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
. // error: expected class or object definition

\u890u3084eu // error: error in unicode escape // error: illegal character '\uffff'

. // error: expected class or object definition
48 changes: 48 additions & 0 deletions tests/neg/unicodeEscapes-interpolations.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- Error: tests/neg/unicodeEscapes-interpolations.scala:2:27 -----------------------------------------------------------
2 | val badInters1 = s"foo \unope that's wrong" // error
| ^
| invalid unicode escape at index 6 of foo \unope that's wrong
-- Error: tests/neg/unicodeEscapes-interpolations.scala:3:32 -----------------------------------------------------------
3 | val badIntersEnd1 = s"foo \u12" // error
| ^
| invalid unicode escape at index 8 of foo \u12
-- Error: tests/neg/unicodeEscapes-interpolations.scala:4:29 -----------------------------------------------------------
4 | val badInters3 = s"""foo \unope that's wrong""" // error
| ^
| invalid unicode escape at index 6 of foo \unope that's wrong
-- Error: tests/neg/unicodeEscapes-interpolations.scala:5:28 -----------------------------------------------------------
5 | val caretPos1 = s"foo \u12x3 pos @ x" // error
| ^
| invalid unicode escape at index 8 of foo \u12x3 pos @ x
-- Error: tests/neg/unicodeEscapes-interpolations.scala:6:34 -----------------------------------------------------------
6 | val caretPos2 = s"foo \uuuuuuu12x3 pos @ x" // error
| ^
| invalid unicode escape at index 14 of foo \uuuuuuu12x3 pos @ x
-- Error: tests/neg/unicodeEscapes-interpolations.scala:7:30 -----------------------------------------------------------
7 | val caretPos3 = s"""foo \u12x3 pos @ x""" // error
| ^
| invalid unicode escape at index 8 of foo \u12x3 pos @ x
-- Error: tests/neg/unicodeEscapes-interpolations.scala:8:36 -----------------------------------------------------------
8 | val caretPos4 = s"""foo \uuuuuuu12x3 pos @ x""" // error
| ^
| invalid unicode escape at index 14 of foo \uuuuuuu12x3 pos @ x
-- Error: tests/neg/unicodeEscapes-interpolations.scala:10:53 ----------------------------------------------------------
10 | val badIntersmultiAfter = s"foo $placeholder bar \unope that's wrong" // error
| ^
| invalid unicode escape at index 7 of bar \unope that's wrong
-- Error: tests/neg/unicodeEscapes-interpolations.scala:11:37 ----------------------------------------------------------
11 | val badIntersmultiBefore = s"foo \unope $placeholder that's wrong" // error
| ^
| invalid unicode escape at index 6 of foo \unope
-- Error: tests/neg/unicodeEscapes-interpolations.scala:12:56 ----------------------------------------------------------
12 | val badInterstmultiAfter = s"""foo $placeholder bar \unope that's wrong""" // error
| ^
| invalid unicode escape at index 7 of bar \unope that's wrong
-- Error: tests/neg/unicodeEscapes-interpolations.scala:13:40 ----------------------------------------------------------
13 | val badInterstmultiBefore = s"""foo \unope $placeholder that's wrong""" // error
| ^
| invalid unicode escape at index 6 of foo \unope
-- Error: tests/neg/unicodeEscapes-interpolations.scala:14:29 ----------------------------------------------------------
14 | val badInterother = s"this \p ain't legal either" // error
| ^
|invalid escape '\p' not one of [\b, \t, \n, \f, \r, \\, \", \', \uxxxx] at index 5 in "this \p ain't legal either". Use \\ for literal \.
15 changes: 15 additions & 0 deletions tests/neg/unicodeEscapes-interpolations.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
object Example {
val badInters1 = s"foo \unope that's wrong" // error
val badIntersEnd1 = s"foo \u12" // error
val badInters3 = s"""foo \unope that's wrong""" // error
val caretPos1 = s"foo \u12x3 pos @ x" // error
val caretPos2 = s"foo \uuuuuuu12x3 pos @ x" // error
val caretPos3 = s"""foo \u12x3 pos @ x""" // error
val caretPos4 = s"""foo \uuuuuuu12x3 pos @ x""" // error
val placeholder = "place"
val badIntersmultiAfter = s"foo $placeholder bar \unope that's wrong" // error
val badIntersmultiBefore = s"foo \unope $placeholder that's wrong" // error
val badInterstmultiAfter = s"""foo $placeholder bar \unope that's wrong""" // error
val badInterstmultiBefore = s"""foo \unope $placeholder that's wrong""" // error
val badInterother = s"this \p ain't legal either" // error
}
28 changes: 28 additions & 0 deletions tests/neg/unicodeEscapes.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- Error: tests/neg/unicodeEscapes.scala:3:25 --------------------------------------------------------------------------
3 | val badsingle = "foo \unope that's wrong" // error
| ^
| invalid character in unicode escape sequence
-- Error: tests/neg/unicodeEscapes.scala:4:26 --------------------------------------------------------------------------
4 | val caretPos = "foo \u12x3 pos @ x" // error
| ^
| invalid character in unicode escape sequence
-- Error: tests/neg/unicodeEscapes.scala:5:33 --------------------------------------------------------------------------
5 | val caretPos2 = "foo \uuuuuuu12x3 pos @ x" // error
| ^
| invalid character in unicode escape sequence
-- Error: tests/neg/unicodeEscapes.scala:6:29 --------------------------------------------------------------------------
6 | val carPosTerm = "foo \u123" // error
| ^
| invalid character in unicode escape sequence
-- Error: tests/neg/unicodeEscapes.scala:7:30 --------------------------------------------------------------------------
7 | val halfAnEscape = "foo \u12" // error
| ^
| invalid character in unicode escape sequence
-- Error: tests/neg/unicodeEscapes.scala:8:30 --------------------------------------------------------------------------
8 | val halfAnEscapeChar = '\u45' // error
| ^
| invalid character in unicode escape sequence
-- Error: tests/neg/unicodeEscapes.scala:9:29 --------------------------------------------------------------------------
9 | val `half An Identifier\u45` = "nope" // error
| ^
| invalid character in unicode escape sequence
10 changes: 10 additions & 0 deletions tests/neg/unicodeEscapes.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

object Example {
val badsingle = "foo \unope that's wrong" // error
val caretPos = "foo \u12x3 pos @ x" // error
val caretPos2 = "foo \uuuuuuu12x3 pos @ x" // error
val carPosTerm = "foo \u123" // error
val halfAnEscape = "foo \u12" // error
val halfAnEscapeChar = '\u45' // error
val `half An Identifier\u45` = "nope" // error
}
43 changes: 22 additions & 21 deletions tests/run/literals.scala
Original file line number Diff line number Diff line change
@@ -1,32 +1,38 @@
// scalac: -deprecation
//
//############################################################################
// Literals
//############################################################################

import scala.util.{Failure, Success, Try}
//############################################################################

object Test {

/* I add a couple of Unicode identifier tests here "temporarily" */

def \u03b1\u03c1\u03b5\u03c4\u03b7 = "alpha rho epsilon tau eta"

case class GGG(i: Int) {
def \u03b1\u03b1(that: GGG) = i + that.i
def check_success[A](name: String, closure: => A, expected: A): Unit = {
val res: Option[String] =
try {
val actual: A = closure
if (actual == expected) None //print(" was successful")
else Some(s" failed: expected $expected, found $actual")
} catch {
case exception: Throwable => Some(s" raised exception $exception")
}
for (e <- res) println(s"test $name $e")
Copy link
Contributor

@som-snytt som-snytt Apr 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was following up that the unicodes on line 11 were edited out, when I noticed that this (edit: partially) reverts my last change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test has to assert because there is no longer a check file -- vulpix only checks the output if there is a check file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll see if I can restore those changes. Does vulpix detect a test failing an assertion? I guess I'll jiggle with it a bit and let you know.

As for the identifier on line 11, the variant that is still supported is in tests/run/unicodeEscapes.scala line 13-14, which makes good on the promise of temporarily.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NBD, I PR'd it, thanks. I was about to futz with more parsing. It's amazing how quickly I forgot like how do I even run a test?

}

def check_success[A](name: String, closure: => A, expected: A): Unit =
Try(closure) match {
case Success(actual) => assert(actual == expected, s"test $name failed: expected $expected, found $actual")
case Failure(error) => throw new AssertionError(s"test $name raised exception $error")
}

def main(args: Array[String]): Unit = {
// char

//unicode escapes escape in char literals
check_success("'\\u0024' == '$'", '\u0024', '$')
check_success("'\\u005f' == '_'", '\u005f', '_')

//unicode escapes escape in interpolations
check_success("\"\\u0024\" == \"$\"", s"\u0024", "$")
check_success("\"\"\"\\u0024\"\"\" == \"$\"", s"""\u0024""", "$")

//Int#asInstanceOf[Char] gets the char at the codepont
check_success("65.asInstanceOf[Char] == 'A'", 65.asInstanceOf[Char], 'A')
check_success("\"\\141\\142\" == \"ab\"", "\141\142", "ab")
check_success("\"\\0x61\\0x62\".trim() == \"x61\\0x62\"", "\0x61\0x62".substring(1), "x61\0x62")

// boolean
check_success("(65 : Byte) == 'A'", (65: Byte) == 'A', true) // contrib #176
Expand Down Expand Up @@ -77,7 +83,6 @@ object Test {
check_success("01.23f == 1.23f", 01.23f, 1.23f)
check_success("3.14f == 3.14f", 3.14f, 3.14f)
check_success("6.022e23f == 6.022e23f", 6.022e23f, 6.022e23f)
check_success("9f == 9.0f", 9f, 9.0f)
check_success("09f == 9.0f", 09f, 9.0f)
check_success("1.00000017881393421514957253748434595763683319091796875001f == 1.0000001f",
1.00000017881393421514957253748434595763683319091796875001f,
Expand Down Expand Up @@ -107,11 +112,7 @@ object Test {
check_success("1L.asInstanceOf[Double] == 1.0", 1L.asInstanceOf[Double], 1.0)

check_success("\"\".length()", "\u001a".length(), 1)

val ggg = GGG(1) \u03b1\u03b1 GGG(2)
check_success("ggg == 3", ggg, 3)

}
}

//############################################################################
//############################################################################
9 changes: 9 additions & 0 deletions tests/run/t3220-3.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
processed...OK
unprocessed...OK
after backslashes
List(\, \, u, 0, 0, 4, 0)
List(\, u, 0, 0, 4, 0)
List(\, \, u, 0, 0, 4, 0)
List(\, u, 0, 0, 4, 0)
List(", (, [, ^, ", \, x, 0, 0, -, \, x, 1, F, \, x, 7, F, \, \, ], |, \, \, [, \, \, ', ", b, f, n, r, t, ], |, \, \, u, [, a, -, f, A, -, F, 0, -, 9, ], {, 4, }, ), *, ")
List(b, a, d, \)
Loading