Skip to content

Commit

Permalink
no processing of raw unicode escapes under 214
Browse files Browse the repository at this point in the history
  • Loading branch information
Martijn Hoekstra committed Aug 20, 2019
1 parent 6010288 commit 734fd23
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 53 deletions.
2 changes: 1 addition & 1 deletion src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ trait Scanners extends ScannersCommon {
nextRawChar()
if (isTripleQuote()) {
setStrVal()
replaceUnicodeEscapes(true)
if(!currentRun.isScala214) replaceUnicodeEscapes(true)
token = STRINGLIT
} else
getRawStringLit()
Expand Down
13 changes: 12 additions & 1 deletion src/compiler/scala/tools/reflect/FastStringInterpolator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,18 @@ trait FastStringInterpolator extends FormatInterpolator {
val treated =
try
parts.mapConserve { case lit@Literal(Constant(stringVal: String)) =>
val k = Constant(if(isRaw) StringContext.processUnicode(stringVal) else StringContext.processEscapes(stringVal))
val k = Constant(if (isRaw && currentRun.isScala214) stringVal
else if (isRaw) {
val processed = StringContext.processUnicode(stringVal)
if(processed != stringVal){
var diffindex = processed.zip(stringVal).zipWithIndex.collectFirst{
case ((p, o), i) if p != o => i
}.getOrElse(processed.length - 1)
c.warning(lit.pos, "Unicode escapes in raw interpolations are deprecated as of scala 2.13.1, and will be removed in scala 2.14")
}
processed
}
else StringContext.processEscapes(stringVal))
// To avoid the backlash of backslash, taken literally by Literal, escapes are processed strictly (scala/bug#11196)
treeCopy.Literal(lit, k).setType(ConstantType(k))
}
Expand Down
63 changes: 46 additions & 17 deletions src/library/scala/StringContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ case class StringContext(parts: String*) {
*
* For example, the raw processed string `raw"a\nb"` is equal to the scala string `"a\\nb"`.
*
* ''Note:'' Even when using the raw interpolator, Scala will preprocess unicode escapes.
* ''Note:'' Even when using the raw interpolator, Scala will process Unicode escapes.
* Unicode processing in the raw interpolator is deprecated as of scala 2.13.1 and
* will be removed in scala 2.14
* For example:
* {{{
* scala> raw"\u005cu0023"
Expand Down Expand Up @@ -373,37 +375,31 @@ object StringContext {
* @param str A string that may contain escape sequences
* @return The string with all escape sequences expanded.
*/
def processEscapes(str: String): String = {
def processEscapes(str: String): String =
str indexOf '\\' match {
case -1 => str
case i => replace(str, i, false)
case i => replace(str, i)
}
}

protected[scala] def processUnicode(str: String): String = {
str indexOf "\\u" match {
case -1 => str
case i => replace(str, i, true)
protected[scala] def processUnicode(str: String): String =
str indexOf "\\" match {
case i if i == -1 || i >= (str.length() - 5) => str
case i => replaceU(str, i)
}
}

// replace escapes with given first escape
private[this] def replace(str: String, first: Int, unicodeOnly: Boolean): String = {
//replace escapes with given first escape
private[this] def replace(str: String, first: Int): String = {
val len = str.length()
val b = new JLSBuilder
// append replacement starting at index `i`, with `next` backslash
@tailrec def loop(i: Int, next: Int): String = {
// append replacement starting at index `i`, with `next` backslash
@tailrec def loop(i: Int, next: Int): String = {
if (next >= 0) {
//require(str(next) == '\\')
if (next > i) b.append(str, i, next)
var idx = next + 1
if (idx >= len) throw new InvalidEscapeException(str, next)
val c = str(idx) match {
case 'u' => 'u'
case chr if unicodeOnly => {
b.append('\\')
chr
}
case 'b' => '\b'
case 't' => '\t'
case 'n' => '\n'
Expand All @@ -427,6 +423,39 @@ object StringContext {
loop(0, first)
}

//replace escapes with given first escape
private[this] def replaceU(str: String, first: Int): String = {
val len = str.length()
val b = new JLSBuilder
// append replacement starting at index `i`, with `next` backslash
@tailrec def loop(i: Int, next: Int): String = {
if (next >= 0) {
//require(str(next) == '\\')
if (next > i) b.append(str, i, next)
var idx = next + 1
if (idx >= len) {
b.toString()
}
else {
val (ch, advance) = str(idx) match {
case 'u' => readUEscape(str, idx)
case chr => {
b.append('\\')
(chr, 1)
}
}
idx += advance
b append ch
loop(idx, str.indexOf('\\', idx))
}
} else {
if (i < len) b.append(str, i, len)
b.toString
}
}
loop(0, first)
}

def standardInterpolator(process: String => String, args: scala.collection.Seq[Any], parts: Seq[String]): String = {
StringContext.checkLengths(args, parts)
val pi = parts.iterator
Expand Down
51 changes: 51 additions & 0 deletions test/files/run/t3220-213.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
t3220-213.scala:9: warning: Unicode escapes in triple quoted strings and raw interpolations are deprecated, use the literal character instead
def inTripleQuoted = """\u000A"""
^
t3220-213.scala:42: warning: Unicode escapes in triple quoted strings and raw interpolations are deprecated, use the literal character instead
"tab unicode escape in triple quoted string" -> """tab\u0009tab""",
^
t3220-213.scala:10: warning: Unicode escapes in raw interpolations are deprecated as of scala 2.13.1, and will be removed in scala 2.14
def inInterpolation = raw"\u000A"
^
t3220-213.scala:11: warning: Unicode escapes in raw interpolations are deprecated as of scala 2.13.1, and will be removed in scala 2.14
def inTripleQuotedInterpolation = raw"""\u000A"""
^
t3220-213.scala:43: warning: Unicode escapes in raw interpolations are deprecated as of scala 2.13.1, and will be removed in scala 2.14
"tab unicode escape in single quoted raw interpolator" -> raw"tab\u0009tab",
^
t3220-213.scala:44: warning: Unicode escapes in raw interpolations are deprecated as of scala 2.13.1, and will be removed in scala 2.14
"tab unicode escape in triple quoted raw interpolator" -> raw"""tab\u0009tab"""
^
supported
literals that result in tab tab:
literal tab in single quoted string
tab escape char in single quoted string
tab unicode escape in single quoted string
literal tab in triple quoted string
literal tab in triple quoted raw interpolator
literal tab in single quoted raw interpolator
literal tab in triple quoted s interpolator
literal tab in single quoted s interpolator
tab escape char in triple quoted s interpolator
tab escape char in single quoted s interpolator
tab unicode escape in triple quoted s interpolator
tab unicode escape in single quoted s interpolator

unsupported
literals that result in tab\ttab:
tab escape char in triple quoted string
tab escape char in raw interpolator

deprecated
literals that result in tab tab:
tab unicode escape in triple quoted string
tab unicode escape in single quoted raw interpolator
tab unicode escape in triple quoted raw interpolator

after backslashes
List(\, \, u, 0, 0, 4, 0)
List(\, u, 0, 0, 4, 0)
List(\, \, u, 0, 0, 4, 0)
List(\, u, 0, 0, 4, 0)
List(", (, [, ^, ", \, x, 0, 0, -, \, x, 1, F, \, x, 7, F, \, \, ], |, \, \, [, \, \, ', ", b, f, n, r, t, ], |, \, \, u, [, a, -, f, A, -, F, 0, -, 9, ], {, 4, }, ), *, ")
List(b, a, d, \)
25 changes: 22 additions & 3 deletions test/files/run/t3220.scala → test/files/run/t3220-213.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ object Literals {
def inTripleQuotedInterpolation = raw"""\u000A"""
def inChar = '\u000A'
def `in backtick quoted\u0020identifier` = "bueno"
//unicode escapes preceded by an odd number of backslash characters
//are *not* processed, regardless of whether the
//backslash characters are processed themselves
def after2slashestriple = """\\u0040"""
def after2slashesplain = "\\u0040"
def after2slashesraw = raw"\\u0040"
def after2slashess = s"\\u0040"
def firstFailure = ("\""+"""([^"\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*"""+"\"")
def badString = """bad\"""


def supported = List(
"literal tab in single quoted string" -> "tab tab",
Expand All @@ -30,7 +40,8 @@ object Literals {
def deprecated =
List(
"tab unicode escape in triple quoted string" -> """tab\u0009tab""",
"tab unicode escape in raw interpolator" -> raw"""tab\u0009tab"""
"tab unicode escape in single quoted raw interpolator" -> raw"tab\u0009tab",
"tab unicode escape in triple quoted raw interpolator" -> raw"""tab\u0009tab"""
)

def unsupported =
Expand All @@ -53,7 +64,7 @@ object Test {
case (result, ways) => {
println(s"literals that result in $result:")
ways.foreach{case (x, _) => println(x)}
println
println()
}
}

Expand All @@ -68,6 +79,14 @@ object Test {
println("deprecated")

printSegment(Literals.deprecated)


println("after backslashes")
println(Literals.after2slashestriple.toList)
println(Literals.after2slashesplain.toList)
println(Literals.after2slashesraw.toList)
println(Literals.after2slashess.toList)
println(Literals.firstFailure.toList)
println(Literals.badString.toList)

}
}
17 changes: 17 additions & 0 deletions test/files/run/t3220-214.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// scalac: -Xsource:2.14

object Literals214 {
def inTripleQuoted = """\u000A"""
def inRawInterpolation = raw"\u000A"
def inRawTripleQuoted = raw"""\u000A"""
}

object Test {

def main(args: Array[String]): Unit = {
val asList = List('\\', 'u', '0', '0', '0', 'A')
assert(asList == Literals214.inTripleQuoted.toList)
assert(asList == Literals214.inRawInterpolation.toList)
assert(asList == Literals214.inRawTripleQuoted.toList)
}
}
31 changes: 0 additions & 31 deletions test/files/run/t3220.check

This file was deleted.

0 comments on commit 734fd23

Please sign in to comment.