Skip to content

Commit

Permalink
misc tidying of the lexer; restore 2.8 compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
mdr committed Dec 12, 2011
1 parent e13058a commit 35b8df4
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 73 deletions.
@@ -0,0 +1,7 @@
package scalariform.lexer

object CharConstants {

final val SU = '\u001A'

}
2 changes: 1 addition & 1 deletion scalariform/src/main/scala/scalariform/lexer/Chars.scala
Expand Up @@ -36,4 +36,4 @@ object Chars {
def isIdentifierPart(c: Char) =
(c == '$') || Character.isUnicodeIdentifierPart(c)

}
}
13 changes: 13 additions & 0 deletions scalariform/src/main/scala/scalariform/lexer/HiddenTokenInfo.scala
@@ -0,0 +1,13 @@
package scalariform.lexer

trait HiddenTokenInfo {

def isInferredNewline(token: Token): Boolean

def inferredNewlines(token: Token): Option[HiddenTokens]

def hiddenPredecessors(token: Token): HiddenTokens

def allHiddenTokens: Iterable[HiddenTokens]

}
8 changes: 3 additions & 5 deletions scalariform/src/main/scala/scalariform/lexer/Lexer.scala
Expand Up @@ -4,7 +4,7 @@ import scala.annotation._
import scala.collection.mutable.{ Queue, Stack }
import scala.math.min
import scala.xml.parsing.TokenTests
import scala.xml.Utility.SU
import scalariform.lexer.CharConstants.SU
import scalariform.lexer.ScalaLexer._
import scalariform.lexer.Tokens._
import scalariform.utils.Utils
Expand All @@ -20,7 +20,7 @@ abstract class Lexer(reader: UnicodeEscapeReader) extends TokenTests {
private var actualTokenTextLength = 0

protected var eof = false
protected var builtToken: Option[Token] = None
protected var builtToken: Token = _

// Two queues maintained in parallel. Invariant: chQueue.length == unicodeEscapesQueue.length
private val chQueue = new Queue[Char]
Expand Down Expand Up @@ -71,12 +71,10 @@ abstract class Lexer(reader: UnicodeEscapeReader) extends TokenTests {
val stopIndex = min(startIndex + tokenLength - 1, reader.s.length - 1) // min protects against overeager consumption past EOF in forgiving mode
val rawText = reader.s.substring(actualTokenTextOffset, stopIndex + 1)
val text = tokenTextBuffer.toString
val token = Token(tokenType, text, startIndex, rawText)
builtToken = Some(token)
builtToken = Token(tokenType, text, startIndex, rawText)
tokenTextBuffer.clear()
actualTokenTextOffset = stopIndex + 1
actualTokenTextLength = 0
// println("Token: " + token)
}

protected def lookaheadIs(s: String): Boolean = s.zipWithIndex forall { case (c, index) ch(index) == c }
Expand Down
@@ -1,41 +1,31 @@
package scalariform.lexer

import scala.collection.immutable.Queue
import scala.collection.JavaConversions._
import scalariform.utils.Utils.boolean2ImpliesWrapper
import scalariform.lexer.Tokens._
import scala.annotation.tailrec
import java.util.{ HashMap, Map JMap }

trait HiddenTokenInfo {

def isInferredNewline(token: Token): Boolean

def inferredNewlines(token: Token): Option[HiddenTokens]

def hiddenPredecessors(token: Token): HiddenTokens

def allHiddenTokens: Iterable[HiddenTokens]

}
import java.{ util ju }

class NewlineInferencer(private val delegate: Iterator[(HiddenTokens, Token)]) extends HiddenTokenInfo {

import NewlineInferencer._

require(delegate.hasNext)

private var hiddenPredecessors: JMap[Token, HiddenTokens] = new HashMap()
private var inferredNewlines: JMap[Token, HiddenTokens] = new HashMap()
private var hiddenPredecessors: ju.Map[Token, HiddenTokens] = new ju.HashMap()
private var inferredNewlines: ju.Map[Token, HiddenTokens] = new ju.HashMap()

def isInferredNewline(token: Token): Boolean = inferredNewlines containsKey token

def inferredNewlines(token: Token): Option[HiddenTokens] = Option(inferredNewlines get token)

def hiddenPredecessors(token: Token): HiddenTokens = hiddenPredecessors get token

import scala.collection.JavaConversions._
lazy val allHiddenTokens = hiddenPredecessors.values ++ inferredNewlines.values

private var buffer: Queue[(HiddenTokens, Token)] = Queue()

@tailrec
private def refillBuffer() {
if (buffer.size < 2 && delegate.hasNext) {
Expand All @@ -52,7 +42,8 @@ class NewlineInferencer(private val delegate: Iterator[(HiddenTokens, Token)]) e
private var previousTokenOption: Option[Token] = None

private var multipleStatementRegionMarkerStack: List[TokenType] = Nil
private def multipleStatementsAllowed = multipleStatementRegionMarkerStack.isEmpty || multipleStatementRegionMarkerStack.head == RBRACE
private def multipleStatementsAllowed =
multipleStatementRegionMarkerStack.isEmpty || multipleStatementRegionMarkerStack.head == RBRACE

def nextToken(): Token = {
val token = nextTokenCore()
Expand Down Expand Up @@ -107,7 +98,8 @@ class NewlineInferencer(private val delegate: Iterator[(HiddenTokens, Token)]) e

private def shouldTranslateToNewline(nextToken: Token) = {
val nextTokenType = nextToken.tokenType
val nextCanBeginAStatement = !tokensWhichCannotBeginAStatement(nextToken.tokenType) && (nextTokenType == CASE implies followingTokenIsClassOrObject)
val nextCanBeginAStatement = !tokensWhichCannotBeginAStatement(nextToken.tokenType) &&
(nextTokenType == CASE implies followingTokenIsClassOrObject)
val previousCanEndAStatement = previousTokenOption.map(_.tokenType).map(tokensWhichCanTerminateAStatement).getOrElse(false)
previousCanEndAStatement && nextCanBeginAStatement && multipleStatementsAllowed
}
Expand All @@ -123,12 +115,12 @@ class NewlineInferencer(private val delegate: Iterator[(HiddenTokens, Token)]) e
object NewlineInferencer {

val tokensWhichCanTerminateAStatement: Set[TokenType] = Set(
INTEGER_LITERAL, FLOATING_POINT_LITERAL, CHARACTER_LITERAL, STRING_LITERAL, SYMBOL_LITERAL, VARID, OTHERID, PLUS, MINUS, STAR, PIPE, TILDE, EXCLAMATION,
THIS, NULL, TRUE, FALSE, RETURN, TYPE, XML_EMPTY_CLOSE, XML_TAG_CLOSE, XML_COMMENT, XML_CDATA, XML_UNPARSED, XML_PROCESSING_INSTRUCTION,
USCORE, RPAREN, RBRACKET, RBRACE)
INTEGER_LITERAL, FLOATING_POINT_LITERAL, CHARACTER_LITERAL, STRING_LITERAL, SYMBOL_LITERAL, VARID, OTHERID, PLUS,
MINUS, STAR, PIPE, TILDE, EXCLAMATION, THIS, NULL, TRUE, FALSE, RETURN, TYPE, XML_EMPTY_CLOSE, XML_TAG_CLOSE,
XML_COMMENT, XML_CDATA, XML_UNPARSED, XML_PROCESSING_INSTRUCTION, USCORE, RPAREN, RBRACKET, RBRACE)

val tokensWhichCannotBeginAStatement: Set[TokenType] = Set(
CATCH, ELSE, EXTENDS, FINALLY, FORSOME, MATCH, REQUIRES,
WITH, YIELD, COMMA, DOT, SEMI, COLON, /* USCORE, */ EQUALS, ARROW, LARROW, SUBTYPE, VIEWBOUND,
SUPERTYPE, HASH, LBRACKET, RPAREN, RBRACKET, RBRACE)
CATCH, ELSE, EXTENDS, FINALLY, FORSOME, MATCH, REQUIRES, WITH, YIELD, COMMA, DOT, SEMI, COLON, /* USCORE, */ EQUALS,
ARROW, LARROW, SUBTYPE, VIEWBOUND, SUPERTYPE, HASH, LBRACKET, RPAREN, RBRACKET, RBRACE)

}
26 changes: 6 additions & 20 deletions scalariform/src/main/scala/scalariform/lexer/ScalaLexer.scala
@@ -1,7 +1,9 @@
package scalariform.lexer

import java.io.File
import scala.annotation._
import scala.collection.mutable.{ Queue, Stack, ListBuffer }
import scala.io.Source
import scalariform.lexer.Tokens._
import scalariform.utils.Utils

Expand All @@ -10,18 +12,14 @@ class ScalaLexer(reader: UnicodeEscapeReader, forgiveErrors: Boolean = false)

override protected val forgiveLexerErrors = forgiveErrors

modeStack.push(new ScalaMode())
modeStack.push(new ScalaMode)

def nextToken(): Token = {
if (eof)
super[Lexer].token(EOF)
else if (isXmlMode)
if (isXmlMode)
fetchXmlToken()
else
fetchScalaToken()
val token = builtToken.get
builtToken = None
token
builtToken
}

override protected def switchToScalaModeAndFetchToken() {
Expand All @@ -37,24 +35,12 @@ class ScalaLexer(reader: UnicodeEscapeReader, forgiveErrors: Boolean = false)
}

object ScalaLexer {
import java.io._

private[lexer] def digit2int(ch: Int, base: Int): Int = {
if ('0' <= ch && ch <= '9' && ch < '0' + base)
ch - '0'
else if ('A' <= ch && ch < 'A' + base - 10)
ch - 'A' + 10
else if ('a' <= ch && ch < 'a' + base - 10)
ch - 'a' + 10
else
-1
}

def createRawLexer(s: String, forgiveErrors: Boolean = false): ScalaLexer =
new ScalaLexer(new UnicodeEscapeReader(s, forgiveErrors), forgiveErrors)

def tokeniseFull(file: File): (HiddenTokenInfo, List[Token]) = {
val s = scala.io.Source.fromFile(file).mkString
val s = Source.fromFile(file).mkString
tokeniseFull(s)
}

Expand Down
@@ -1,13 +1,16 @@
package scalariform.lexer

import scala.annotation._
import scala.xml.Utility.SU
import scalariform.lexer.CharConstants.SU
import scalariform.lexer.Chars._
import scalariform.lexer.ScalaLexer._
import scalariform.lexer.Tokens._
import scalariform.utils.Utils
import scalariform.lexer.Chars._

trait ScalaOnlyLexer extends Lexer {
/**
* Lexer implementation for "pure Scala", i.e. not XML
*/
private[lexer] trait ScalaOnlyLexer extends Lexer {

private var processingSymbol = false

Expand Down Expand Up @@ -353,9 +356,8 @@ trait ScalaOnlyLexer extends Lexer {
val base1 = if (base < 10) 10 else base

// read 8,9's even if format is octal, produce a malformed number error afterwards.
while (digit2int(ch, base1) >= 0) {
while (Utils.digit2int(ch, base1) >= 0)
nextChar()
}

def restOfUncertainToken() = {
def isEfd = ch match {
Expand Down Expand Up @@ -431,3 +433,4 @@ trait ScalaOnlyLexer extends Lexer {
}

}

6 changes: 3 additions & 3 deletions scalariform/src/main/scala/scalariform/lexer/Token.scala
Expand Up @@ -21,13 +21,13 @@ case class Token(tokenType: TokenType, text: String, offset: Int, rawText: Strin

def isNewline = tokenType.isNewline

@deprecated(message = "Use text instead", since = "0.1.2")
@deprecated(message = "Use text instead" /*, since = "0.1.2"*/)
def getText = text

@deprecated(message = "Use offset instead", since = "0.1.2")
@deprecated(message = "Use offset instead" /*, since = "0.1.2"*/)
def startIndex = offset

@deprecated(message = "Use lastCharacterOffset instead", since = "0.1.2")
@deprecated(message = "Use lastCharacterOffset instead"/*, since = "0.1.2"*/)
def stopIndex = lastCharacterOffset

}
@@ -1,7 +1,8 @@
package scalariform.lexer

import scala.xml.Utility.SU
import scalariform.lexer.ScalaLexer._
import scalariform.lexer.CharConstants.SU

import scalariform.utils.Utils.digit2int

class UnicodeEscapeReader(val s: String, forgiveLexerErrors: Boolean = false) {

Expand Down
Expand Up @@ -3,32 +3,37 @@ package scalariform.lexer
import scalariform.lexer.Tokens._
import scala.collection.mutable.ListBuffer

class WhitespaceAndCommentsGrouper(private val delegate: ScalaLexer) extends Iterator[(HiddenTokens, Token)] {
/**
* Groups together whitespace and comments and filters them out from other token types.
*/
private[lexer] class WhitespaceAndCommentsGrouper(lexer: ScalaLexer) extends Iterator[(HiddenTokens, Token)] {

private var nextToken = lexer.nextToken()

private var currentToken = delegate.nextToken
private var ended = false

def hasNext = !ended

def next() = {
require(hasNext)
val hiddenTokens = readHiddenTokens()
val resultToken = currentToken
if (currentToken.tokenType == EOF)
val resultToken = nextToken
if (nextToken.tokenType == EOF)
ended = true
currentToken = delegate.nextToken
nextToken = lexer.nextToken()
(hiddenTokens, resultToken)
}

private def readHiddenTokens(): HiddenTokens = {
var hiddenTokens = new ListBuffer[HiddenToken]
while (currentToken.tokenType != EOF && isHiddenToken(currentToken)) {
hiddenTokens += makeHiddenToken(currentToken)
currentToken = delegate.nextToken
val hiddenTokens = new ListBuffer[HiddenToken]
while (isCommentOrWhitespace(nextToken)) {
hiddenTokens += makeHiddenToken(nextToken)
nextToken = lexer.nextToken()
}
new HiddenTokens(hiddenTokens.toList)
}

private def isHiddenToken(token: Token) = token.tokenType match {
private def isCommentOrWhitespace(token: Token) = token.tokenType match {
case WS | LINE_COMMENT | MULTILINE_COMMENT true
case _ false
}
Expand Down
8 changes: 5 additions & 3 deletions scalariform/src/main/scala/scalariform/lexer/XmlLexer.scala
@@ -1,11 +1,14 @@
package scalariform.lexer

import scala.annotation._
import scala.xml.Utility.SU
import scalariform.lexer.CharConstants.SU
import scalariform.lexer.ScalaLexer._
import scalariform.lexer.Tokens._
import scalariform.utils.Utils
import scalariform.lexer.ScalaLexer._

/**
* Lexer implementation for XML literals and patterns
*/
trait XmlLexer extends Lexer {

private def xmlMode: XmlMode = modeStack.head.asInstanceOf[XmlMode]
Expand Down Expand Up @@ -264,4 +267,3 @@ trait XmlLexer extends Lexer {
}

}

11 changes: 11 additions & 0 deletions scalariform/src/main/scala/scalariform/utils/Utils.scala
Expand Up @@ -3,6 +3,7 @@ package scalariform.utils
import java.io.FileOutputStream
import java.io.FileInputStream
import java.io.IOException

object Utils {

def asInstanceOf[T](o: Any) = if (o.isInstanceOf[T]) Some(o.asInstanceOf[T]) else None
Expand Down Expand Up @@ -120,5 +121,15 @@ object Utils {
result
}

def digit2int(ch: Char, base: Int): Int =
if ('0' <= ch && ch <= '9' && ch < '0' + base)
ch - '0'
else if ('A' <= ch && ch < 'A' + base - 10)
ch - 'A' + 10
else if ('a' <= ch && ch < 'a' + base - 10)
ch - 'a' + 10
else
-1

}

0 comments on commit 35b8df4

Please sign in to comment.