Permalink
Browse files

Add String interpolation support to the lexer.

  • Loading branch information...
1 parent bcea26e commit 34fcc02e783ed75972308ce91776b74bfa108205 @mdr mdr committed Apr 24, 2012
@@ -1,27 +1,41 @@
package scalariform
/**
- * A group of Scala versions that Scalariform wants to distinguish (primarily because they have incompatible syntax)
+ * A group of Scala versions that Scalariform wants to distinguish because they have incompatible syntax
*/
-sealed trait ScalaVersionGroup
-case object SCALA_28_29_210 extends ScalaVersionGroup
+sealed trait ScalaVersionGroup extends Ordered[ScalaVersionGroup] {
+
+ def compare(that: ScalaVersionGroup) =
+ this match {
+ case `that` 0
+ case SCALA_28_29 -1
+ case SCALA_211 1
+ }
+
+}
+
+case object SCALA_28_29 extends ScalaVersionGroup
+case object SCALA_210 extends ScalaVersionGroup
case object SCALA_211 extends ScalaVersionGroup
object ScalaVersions {
-
+
+ def DEFAULT_GROUP = getVersionGroup(DEFAULT_VERSION)
+
def DEFAULT_VERSION = "2.9.1"
def getVersionGroup(version: String): ScalaVersionGroup =
version match {
- case _ if version startsWith "2.8." SCALA_28_29_210
- case _ if version startsWith "2.9." SCALA_28_29_210
- case _ if version startsWith "2.10." SCALA_28_29_210
+ case _ if version startsWith "2.8." SCALA_28_29
+ case _ if version startsWith "2.9." SCALA_28_29
+ case _ if version startsWith "2.10." SCALA_210
case _ SCALA_211
}
def representativeVersion(versionGroup: ScalaVersionGroup) = versionGroup match {
- case SCALA_28_29_210 => "2.9.1"
- case SCALA_211 => "2.11"
+ case SCALA_28_29 "2.9.1"
+ case SCALA_210 "2.10.0"
+ case SCALA_211 "2.11.0"
}
-
+
}
@@ -32,4 +32,12 @@ class XmlMode extends LexerMode {
def nestingLevel = tagNestLevel
+}
+
+class StringInterpolationMode(val multiLine: Boolean) extends LexerMode {
+
+ var initialSegment = true
+
+ var interpolationVariable = false
+
}
@@ -20,7 +20,7 @@ trait ModeStack { self: ScalaLexer ⇒
protected def isRootMode = modeStack.size == 1
protected def switchToScalaModeAndFetchToken() {
- modeStack.push(new ScalaMode)
+ switchToScalaMode()
fetchScalaToken()
}
@@ -29,10 +29,24 @@ trait ModeStack { self: ScalaLexer ⇒
fetchXmlToken()
}
+ protected def switchToStringInterpolationMode(multiLine: Boolean) {
+ modeStack.push(new StringInterpolationMode(multiLine))
+ }
+
+ protected def switchToScalaMode() {
+ modeStack.push(new ScalaMode)
+ }
+
+ protected def isStringInterpolationMode = modeStack.head.isInstanceOf[StringInterpolationMode]
+
protected def isXmlMode = modeStack.head.isInstanceOf[XmlMode]
+ protected def isScalaMode = modeStack.head.isInstanceOf[ScalaMode]
+
protected def xmlMode: XmlMode = modeStack.head.asInstanceOf[XmlMode]
protected def scalaMode: ScalaMode = modeStack.head.asInstanceOf[ScalaMode]
+ protected def stringInterpolationMode: StringInterpolationMode = modeStack.head.asInstanceOf[StringInterpolationMode]
+
}
@@ -14,7 +14,7 @@ import scalariform._
class ScalaLexer(
protected val reader: IUnicodeEscapeReader,
protected val forgiveErrors: Boolean = false,
- protected val scalaVersion: ScalaVersionGroup = SCALA_28_29_210)
+ protected val scalaVersion: ScalaVersionGroup = ScalaVersions.DEFAULT_GROUP)
extends ScalaOnlyLexer with XmlLexer with ModeStack with TokenTests with Iterator[Token] {
import ScalaLexer._
@@ -187,15 +187,38 @@ class ScalaLexer(
def next(): Token = {
if (isXmlMode)
fetchXmlToken()
- else
+ else if (isScalaMode)
fetchScalaToken()
+ else if (isStringInterpolationMode) {
+ fetchStringInterpolationToken()
+ }
+
if (builtToken.tokenType == EOF)
eofTokenEmitted = true
builtToken
}
def hasNext = !eofTokenEmitted
+ private def fetchStringInterpolationToken() {
+ if (stringInterpolationMode.interpolationVariable) {
+ stringInterpolationMode.interpolationVariable = false
+ do {
+ nextChar()
+ } while (ch != SU && Character.isUnicodeIdentifierPart(ch))
+ token(VARID)
+ } else {
+ if (stringInterpolationMode.initialSegment) {
+ stringInterpolationMode.initialSegment = false
+ if (stringInterpolationMode.multiLine)
+ munch("\"\"\"")
+ else
+ munch("\"")
+ }
+ getStringPart(stringInterpolationMode.multiLine)
+ }
+ }
+
}
object ScalaLexer {
@@ -246,7 +269,7 @@ object ScalaLexer {
private val BUFFER_MASK = BUFFER_SIZE - 1
- private def makeRawLexer(s: String, forgiveErrors: Boolean = false, scalaVersionGroup: ScalaVersionGroup = SCALA_28_29_210): ScalaLexer =
+ private def makeRawLexer(s: String, forgiveErrors: Boolean = false, scalaVersionGroup: ScalaVersionGroup = ScalaVersions.DEFAULT_GROUP): ScalaLexer =
new ScalaLexer(new UnicodeEscapeReader(s, forgiveErrors), forgiveErrors, scalaVersionGroup)
}
@@ -6,7 +6,7 @@ import scalariform.lexer.Chars._
import scalariform.lexer.ScalaLexer._
import scalariform.lexer.Tokens._
import scalariform.utils.Utils
-import scalariform.SCALA_211
+import scalariform._
/**
* Lexer implementation for non-XML Scala
@@ -15,6 +15,8 @@ private[lexer] trait ScalaOnlyLexer { self: ScalaLexer ⇒
private var processingSymbol = false
+ private var possibleInterpolationId = false
+
protected def fetchScalaToken() {
(ch: @switch) match {
case ' ' | '\t' | '\n' | '\r' /* TODO: | FF */
@@ -33,7 +35,11 @@ private[lexer] trait ScalaOnlyLexer { self: ScalaLexer ⇒
'u' | 'v' | 'w' | 'x' | 'y' |
'z'
nextChar()
- getIdentRest()
+ try {
+ possibleInterpolationId = true
+ getIdentRest()
+ } finally
+ possibleInterpolationId = false
case '<'
lastCh match {
case SU | ' ' | '\t' | '\n' | '{' | '(' | '>' if ch(1) != SU && (isNameStart(ch(1)) || ch(1) == '!' || ch(1) == '?')
@@ -106,12 +112,12 @@ private[lexer] trait ScalaOnlyLexer { self: ScalaLexer ⇒
nextChar(); token(RBRACE)
val nestingLevel = scalaMode.unnestBrace()
if (nestingLevel == 0 && !isRootMode)
- popMode() // Go back to XML
+ popMode() // Go back to XML or string interpolation
case '['
nextChar(); token(LBRACKET)
case ']'
nextChar(); token(RBRACKET)
- case SU
+ case SU
token(EOF)
case _
if (ch == '\u21D2') {
@@ -210,6 +216,67 @@ private[lexer] trait ScalaOnlyLexer { self: ScalaLexer ⇒
scanForClosingTripleQuotes()
}
+ @tailrec
+ final protected def getStringPart(multiLine: Boolean) {
+ if (ch == '"') {
+ if (multiLine) {
+ nextChar()
+ if (isTripleQuote()) {
+ token(STRING_PART)
+ popMode()
+ } else
+ getStringPart(multiLine)
+ } else {
+ nextChar()
+ token(STRING_PART)
+ popMode()
+ }
+ } else if (ch == '$') {
+ nextChar()
+ if (ch == '$') {
+ nextChar()
+ getStringPart(multiLine)
+ } else if (ch == '{') {
+ token(STRING_PART)
+ switchToScalaMode()
+ } else if (Character.isUnicodeIdentifierStart(ch)) {
+ token(STRING_PART)
+ stringInterpolationMode.interpolationVariable = true
+ } else {
+ if (forgiveErrors) {
+ nextChar()
+ getStringPart(multiLine)
+ } else
+ throw new ScalaLexerException("invalid string interpolation")
+ }
+ } else {
+ val isUnclosedLiteral = !isUnicodeEscape && (ch == SU || (!multiLine && (ch == '\r' || ch == '\n')))
+ if (isUnclosedLiteral) {
+ if (forgiveErrors) {
+ token(STRING_PART)
+ popMode()
+ } else
+ throw new ScalaLexerException(if (!multiLine) "unclosed string literal" else "unclosed multi-line string literal")
+ } else {
+ nextChar()
+ getStringPart(multiLine)
+ }
+ }
+ }
+
+ private def isTripleQuote(): Boolean =
+ if (ch == '"') {
+ nextChar()
+ if (ch == '"') {
+ nextChar()
+ while (ch == '"')
+ nextChar()
+ true
+ } else
+ false
+ } else
+ false
+
private def getIdentRest(): Unit = (ch: @switch) match {
case 'A' | 'B' | 'C' | 'D' | 'E' |
'F' | 'G' | 'H' | 'I' | 'J' |
@@ -274,8 +341,12 @@ private[lexer] trait ScalaOnlyLexer { self: ScalaLexer ⇒
val tokenType =
if (processingSymbol)
SYMBOL_LITERAL
- else
+ else if (possibleInterpolationId && ch == '\"' && scalaVersion >= SCALA_210) {
+ switchToStringInterpolationMode(lookaheadIs("\"\"\""))
+ INTERPOLATION_ID
+ } else
Keywords(getTokenText).getOrElse(VARID)
+
token(tokenType)
}
@@ -393,12 +464,12 @@ private[lexer] trait ScalaOnlyLexer { self: ScalaLexer ⇒
if (ch == '.') {
val c = ch(1)
-
- if (scalaVersion == SCALA_211 && !isDigit(c)) {
+
+ if (scalaVersion >= SCALA_211 && !isDigit(c)) {
token(INTEGER_LITERAL)
return
}
-
+
val isDefinitelyNumber =
(c: @switch) match {
/** Another digit is a giveaway. */
@@ -18,6 +18,7 @@ object Tokens {
val RPAREN = TokenType("RPAREN")
val IMPORT = TokenType("IMPORT")
val STRING_LITERAL = TokenType("STRING_LITERAL")
+ val STRING_PART = TokenType("STRING_PART")
val FLOATING_POINT_LITERAL = TokenType("FLOATING_POINT_LITERAL")
val EXCLAMATION = TokenType("EXCLAMATION")
val NEWLINES = TokenType("NEWLINES")
@@ -83,6 +84,7 @@ object Tokens {
val OVERRIDE = TokenType("OVERRIDE")
val ARROW = TokenType("ARROW")
val EXTENDS = TokenType("EXTENDS")
+ val INTERPOLATION_ID = TokenType("INTERPOLATION_ID")
val XML_START_OPEN = TokenType("XML_START_OPEN", isXml = true)
val XML_EMPTY_CLOSE = TokenType("XML_EMPTY_CLOSE", isXml = true)
val XML_TAG_CLOSE = TokenType("XML_TAG_CLOSE", isXml = true)
@@ -10,11 +10,11 @@ import java.io._
class ScalaLexerTest extends FlatSpec with ShouldMatchers {
- implicit def string2TestString(s: String)(implicit forgiveErrors: Boolean = false, scalaVersion: ScalaVersionGroup = SCALA_28_29_210) =
+ implicit def string2TestString(s: String)(implicit forgiveErrors: Boolean = false, scalaVersion: ScalaVersionGroup = ScalaVersions.DEFAULT_GROUP) =
new TestString(s, forgiveErrors, scalaVersion)
"" producesTokens ()
-
+
"println" producesTokens (VARID)
"lazy" producesTokens (LAZY)
@@ -103,7 +103,7 @@ class ScalaLexerTest extends FlatSpec with ShouldMatchers {
"42.toString" producesTokens (INTEGER_LITERAL, DOT, VARID)
{
- implicit val scalaVersion = SCALA_28_29_210
+ implicit val scalaVersion = SCALA_28_29
"5.f" producesTokens (FLOATING_POINT_LITERAL)
"5.d" producesTokens (FLOATING_POINT_LITERAL)
"5." producesTokens (FLOATING_POINT_LITERAL)
@@ -116,6 +116,26 @@ class ScalaLexerTest extends FlatSpec with ShouldMatchers {
"5." producesTokens (INTEGER_LITERAL, DOT)
}
+ {
+ implicit val scalaVersion = SCALA_28_29
+ """ X s"" """ producesTokens (WS, VARID, WS, VARID, STRING_LITERAL, WS)
+ }
+
+ {
+ implicit val scalaVersion = SCALA_210
+ """ X s"" """ producesTokens (WS, VARID, WS, INTERPOLATION_ID, STRING_PART, WS)
+ """ X s "" """ producesTokens (WS, VARID, WS, VARID, WS, STRING_LITERAL, WS)
+ """ s"$foo" """ producesTokens (WS, INTERPOLATION_ID, STRING_PART, VARID, STRING_PART, WS)
+ """ s"$$" """ producesTokens (WS, INTERPOLATION_ID, STRING_PART, WS)
+ """ s"${foo}" """ producesTokens (WS, INTERPOLATION_ID, STRING_PART, LBRACE, VARID, RBRACE, STRING_PART, WS)
+ """ s"${s"${x}"}" """ producesTokens (WS, INTERPOLATION_ID, STRING_PART, LBRACE, INTERPOLATION_ID, STRING_PART, LBRACE, VARID, RBRACE, STRING_PART, RBRACE, STRING_PART, WS)
+
+ <t>s""""""</t>.text producesTokens (INTERPOLATION_ID, STRING_PART)
+ <t>s"""""""""</t>.text producesTokens (INTERPOLATION_ID, STRING_PART)
+ <t>s""" $foo """</t>.text producesTokens (INTERPOLATION_ID, STRING_PART, VARID, STRING_PART)
+
+ }
+
"'f'" producesTokens (CHARACTER_LITERAL)
"""'\n'""" producesTokens (CHARACTER_LITERAL)
"""'\025'""" producesTokens (CHARACTER_LITERAL)
@@ -226,7 +246,7 @@ println("foo")""" producesTokens (VARID, LPAREN, STRING_LITERAL, RPAREN, WS, VAR
}
- class TestString(s: String, forgiveErrors: Boolean = false, scalaVersionGroup: ScalaVersionGroup = SCALA_28_29_210) {
+ class TestString(s: String, forgiveErrors: Boolean = false, scalaVersionGroup: ScalaVersionGroup = ScalaVersions.DEFAULT_GROUP) {
def producesTokens(toks: TokenType*)() {
check(s.stripMargin, toks.toList)

0 comments on commit 34fcc02

Please sign in to comment.