Permalink
Browse files

SI-8015 Count lines by EOLs

Source lines were counted by "line break chars", including FF.
Clients of `pos.line` seem to all expect the ordinary line num,
so that is what they get.

Unicode processing now precedes line ending processing.
  • Loading branch information...
1 parent ada8d91 commit bb2e99a69201060b88dd25602def42f31ba9ab65 @som-snytt som-snytt committed Dec 18, 2013
@@ -206,9 +206,9 @@ trait Scanners extends ScannersCommon {
token = kwArray(idx)
if (token == IDENTIFIER && allowIdent != name) {
if (name == nme.MACROkw)
- syntaxError(name+" is now a reserved word; usage as an identifier is disallowed")
+ syntaxError(s"$name is now a reserved word; usage as an identifier is disallowed")
else if (emitIdentifierDeprecationWarnings)
- deprecationWarning(name+" is now a reserved word; usage as an identifier is deprecated")
+ deprecationWarning(s"$name is now a reserved word; usage as an identifier is deprecated")
}
}
}
@@ -389,7 +389,7 @@ trait Scanners extends ScannersCommon {
// println("blank line found at "+lastOffset+":"+(lastOffset to idx).map(buf(_)).toList)
return true
}
- if (idx == end) return false
+ if (idx == end) return false
} while (ch <= ' ')
}
idx += 1; ch = buf(idx)
@@ -46,15 +46,18 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
def isUnicodeEscape = charOffset == lastUnicodeOffset
/** Advance one character; reducing CR;LF pairs to just LF */
- final def nextChar() {
+ final def nextChar(): Unit = {
if (charOffset >= buf.length) {
ch = SU
} else {
val c = buf(charOffset)
ch = c
charOffset += 1
if (c == '\\') potentialUnicode()
- else if (c < ' ') { skipCR(); potentialLineEnd() }
+ if (ch < ' ') {
+ skipCR()
+ potentialLineEnd()
+ }
}
}
@@ -74,7 +77,7 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
}
/** Interpret \\uxxxx escapes */
- private def potentialUnicode() {
+ private def potentialUnicode() = {
def evenSlashPrefix: Boolean = {
var p = charOffset - 2
while (p >= 0 && buf(p) == '\\') p -= 1
@@ -105,13 +108,17 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
}
/** replace CR;LF by LF */
- private def skipCR() {
- if (ch == CR)
- if (charOffset < buf.length && buf(charOffset) == LF) {
- charOffset += 1
- ch = LF
+ private def skipCR() =
+ if (ch == CR && charOffset < buf.length)
+ buf(charOffset) match {
+ case LF =>
+ charOffset += 1
+ ch = LF
+ case '\\' =>
+ if (lookaheadReader.getu == LF)
+ potentialUnicode()
+ case _ =>
}
- }
/** Handle line ends */
private def potentialLineEnd() {
@@ -132,5 +139,6 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
def error(offset: Int, msg: String) = self.error(offset, msg)
/** A mystery why CharArrayReader.nextChar() returns Unit */
def getc() = { nextChar() ; ch }
+ def getu() = { require(buf(charOffset) == '\\') ; ch = '\\' ; charOffset += 1 ; potentialUnicode() ; ch }
}
}
@@ -16,12 +16,13 @@ import scala.reflect.internal.Chars._
/** abstract base class of a source file used in the compiler */
abstract class SourceFile {
- def content : Array[Char] // normalized, must end in SU
- def file : AbstractFile
- def isLineBreak(idx : Int) : Boolean
+ def content: Array[Char] // normalized, must end in SU
+ def file : AbstractFile
+ def isLineBreak(idx: Int): Boolean
+ def isEndOfLine(idx: Int): Boolean
def isSelfContained: Boolean
def length : Int
- def position(offset: Int) : Position = {
+ def position(offset: Int): Position = {
assert(offset < length, file + ": " + offset + " >= " + length)
Position.offset(this, offset)
}
@@ -52,6 +53,7 @@ object NoSourceFile extends SourceFile {
def content = Array()
def file = NoFile
def isLineBreak(idx: Int) = false
+ def isEndOfLine(idx: Int) = false
def isSelfContained = true
def length = -1
def offsetToLine(offset: Int) = -1
@@ -129,17 +131,24 @@ class BatchSourceFile(val file : AbstractFile, val content0: Array[Char]) extend
}
def isLineBreak(idx: Int) =
- if (idx >= length) false else {
- val ch = content(idx)
+ (idx < length) && (content(idx) match {
// don't identify the CR in CR LF as a line break, since LF will do.
- if (ch == CR) (idx + 1 == length) || (content(idx + 1) != LF)
- else isLineBreakChar(ch)
- }
+ case CR => (idx + 1 == length) || (content(idx + 1) != LF)
+ case ch => isLineBreakChar(ch)
+ })
+
+ def isEndOfLine(idx: Int) =
+ (idx < length) && (content(idx) match {
+ // don't identify the CR in CR LF as a line break, since LF will do.
+ case CR => (idx + 1 == length) || (content(idx + 1) != LF)
+ case LF => true
+ case _ => false
+ })
def calculateLineIndices(cs: Array[Char]) = {
val buf = new ArrayBuffer[Int]
buf += 0
- for (i <- 0 until cs.length) if (isLineBreak(i)) buf += i + 1
+ for (i <- 0 until cs.length) if (isEndOfLine(i)) buf += i + 1
buf += cs.length // sentinel, so that findLine below works smoother
buf.toArray
}
@@ -149,8 +158,8 @@ class BatchSourceFile(val file : AbstractFile, val content0: Array[Char]) extend
private var lastLine = 0
- /** Convert offset to line in this source file
- * Lines are numbered from 0
+ /** Convert offset to line in this source file.
+ * Lines are numbered from 0.
*/
def offsetToLine(offset: Int): Int = {
val lines = lineIndices
@@ -0,0 +1,6 @@
+t8015-ffa.scala:7: error: type mismatch;
+ found : String("3")
+ required: Int
+ val i: Int = "3" // error line 7 (was 8)
+ ^
+one error found
@@ -0,0 +1,8 @@
+
+package foo
+
+//------- object Next
+
+trait F {
+ val i: Int = "3" // error line 7 (was 8)
+}
@@ -0,0 +1,6 @@
+t8015-ffb.scala:10: warning: side-effecting nullary methods are discouraged: suggest defining as `def w()` instead
+ def w = { x
+ ^
+error: No warnings can be incurred under -Xfatal-warnings.
+one warning found
+one error found
@@ -0,0 +1 @@
+-Xlint -Xfatal-warnings
@@ -0,0 +1,11 @@
+
+trait G {
+ val c: Char = '\u000a' // disallowed!
+ def x\u000d\u000a = 9 // as nl
+ def y() = x
+ def z() = {
+ y()\u000a() // was Int does not take parameters
+ }
+ def v = y()\u000c() // was Int does not take parameters
+ def w = { x () } // ^L is colored blue on this screen, hardly visible
+}
@@ -0,0 +1,7 @@
+
+object Test extends App {
+ val ms = """This is a long multiline string
+ with \u000d\u000a CRLF embedded."""
+ assert(ms.lines.size == 3, s"lines.size ${ms.lines.size}")
+ assert(ms contains "\r\n CRLF", "no CRLF")
+}

0 comments on commit bb2e99a

Please sign in to comment.