Skip to content

Commit

Permalink
Added lexer token class ExtToken with customized printing of tokens, …
Browse files Browse the repository at this point in the history
…which

* hides the mostly-useless token type numbers which were causing the tests to change unnecessarily, and
* displays tokens whose text is modified (e.g. "'a'" lexes to a token whose text is "a") with a reconstruction of their original text.


git-svn-id: http://switchb.org/svn/e/cl-e/trunk@786 bf3ccfa1-f3f3-0310-a3d5-cea1fe9d5a75
  • Loading branch information
kpreid committed Feb 19, 2012
1 parent 4fc548d commit 0008e6e
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 16 deletions.
47 changes: 47 additions & 0 deletions antlr/ExtToken.java
@@ -0,0 +1,47 @@
// Copyright 2007 Kevin Reid, under the terms of the MIT X license
// found at http://www.opensource.org/licenses/mit-license.html ...............

import antlr.CommonToken;

/** Modified print behavior over CommonToken; in particular, does not print the numeric token type, which is not useful information to the user; also knows how to reverse text transformations for particular token types. */
public class ExtToken extends CommonToken {

/** this is the constructor invoked by CharScanner */
public ExtToken() { super(); }

public String getOriginalText() {
if (type == ETokenTypes.EOF) {
return "<EOF>";
} else if (type == ETokenTypes.SOURCE_VALUE_HOLE) {
return "<$-hole #" + getText() + ">";
} else if (type == ETokenTypes.SOURCE_PATTERN_HOLE) {
return "<@-hole #" + getText() + ">";
} else if (type == ETokenTypes.DOLLAR_IDENT) {
return "$" + getText();
} else if (type == ETokenTypes.AT_IDENT) {
return "@" + getText();
} else if (type == ETokenTypes.DOLLARESC) {
return "$\\" + getText(); // XXX escape
} else if (type == ETokenTypes.CHAR_LITERAL) {
return "'" + getText() + "'"; // XXX escape
} else if (type == ETokenTypes.STRING) {
return "\"" + getText() + "\""; // XXX escape
} else if (type == ETokenTypes.HEX) {
return "0x" + getText();
} else if (type == ETokenTypes.OCTAL) {
return "0" + getText();
} else if (type == ETokenTypes.URI) {
return "<" + getText() + ">";
} else if (type == ETokenTypes.URIGetter) {
return "<" + getText() + ">";
} else if (type == ETokenTypes.URIStart) {
return "<" + getText();
} else {
return getText();
}
}

public String toString() {
return "<\"" + getOriginalText() + "\" @ " + getLine() + ":" + getColumn() + ">";
}
}
9 changes: 5 additions & 4 deletions jlib/parseEToSExpression.emaker
Expand Up @@ -64,10 +64,11 @@ def author(<unsafe>) {
def elexer := makeEALexer(makeCountingLexerSharedInputState(makeStringReader(text), 0))
def qlexer := makeQuasiLexer(elexer.getInputState())
def tb := makeTokenMultiBuffer(["e", "quasi"], [elexer, qlexer])
elexer.setSelector(tb)
qlexer.setSelector(tb)
elexer.setFilename(fname)
qlexer.setFilename(fname)
for lexer in [elexer, qlexer] {
lexer.setSelector(tb)
lexer.setFilename(fname)
lexer.setTokenObjectClass("ExtToken")
}

def parser := makeEParser(tb)
parser.setFilename(fname)
Expand Down
2 changes: 2 additions & 0 deletions lisp/antlr-system.lisp
Expand Up @@ -111,6 +111,8 @@
:pathname (merge-pathnames #p"antlr/" (component-pathname (find-system :e-on-cl)))
:components
((:java-source-file "ExtAST")
(:java-source-file "ExtToken"
:depends-on ("e"))
(:java-source-file "CountingCharBuffer")
(:java-source-file "CountingLexerSharedInputState")
(:antlr-source-file "e"
Expand Down
52 changes: 40 additions & 12 deletions tests/syntax-in.updoc
Expand Up @@ -96,7 +96,7 @@ or both.
A decimal point must have at least one following digit, because otherwise it is ambiguous with call syntax.

? t("3.")
# problem: (line 1)@3: unexpected token: ["null",<1>,line=1,col=3]
# problem: (line 1)@3: unexpected token: <"<EOF>" @ 1:3>
? t("3.e2")
# stdout: CurryE. CallE.
# LiteralE. 3
Expand All @@ -119,7 +119,7 @@ An additional dot always indicates call syntax.
There must also be at least one preceding digit.

? t(".2")
# problem: (line 1)@1: unexpected token: [".",<136>,line=1,col=1]
# problem: (line 1)@1: unexpected token: <"." @ 1:1>

--- Nouns ---

Expand Down Expand Up @@ -236,7 +236,7 @@ XXX ListExpr doesn't really belong here '

Fails for the sake of sane precedence rules, even though it would be unambiguous, sort of.
? t("1 + def a := 2")
# problem: (line 1)@11: unexpected token: [":=",<165>,line=1,col=11]
# problem: (line 1)@11: unexpected token: <":=" @ 1:11>
XXX produce a nice error for this?

? t("a & def _ {}")
Expand Down Expand Up @@ -554,8 +554,7 @@ Parentheses disable modPow form
# NounE. a

? t("a :b :c")
# problem: (line 1)@6: unexpected token: [":",<175>,line=1,col=6]
XXX prettier error
# problem: (line 1)@6: unexpected token: <":" @ 1:6>

--- ForExpr ---

Expand Down Expand Up @@ -675,7 +674,7 @@ XXX Despite the appearance of the above test, keyword testing is not properly im
--- URISchemeExpr ---

? t("<>")
# problem: (line 1)@1: unexpected token: ["<",<204>,line=1,col=1]
# problem: (line 1)@1: unexpected token: <"<" @ 1:1>

? t("<a>")
# stdout: URISchemeE. a
Expand All @@ -690,11 +689,11 @@ would-be-keywords

erroneous
? t("<ab cd>")
# problem: (line 1)@1: unexpected token: ["<",<204>,line=1,col=1]
# problem: (line 1)@1: unexpected token: <"<" @ 1:1>
? t("<+a>")
# problem: (line 1)@1: unexpected token: ["<",<204>,line=1,col=1]
# problem: (line 1)@1: unexpected token: <"<" @ 1:1>
? t("<a!b>")
# problem: (line 1)@1: unexpected token: ["<",<204>,line=1,col=1]
# problem: (line 1)@1: unexpected token: <"<" @ 1:1>

--- WhenExpr ---

Expand Down Expand Up @@ -781,9 +780,38 @@ XXX incomplete tests; borrow when-fn examples from syntax-sugar
# NounE. b
# [] NounE. c

--- Parsing errors ---

Printing of tokens in errors, for those tokens which have special treatment:

? t("[")
# problem: (line 1)@2: unexpected token: <"<EOF>" @ 1:2>
XXX this is not the best; we shouldn't be quoting the meta

NOTE: no handy way to test quasiliteral tokens (DOLLARESC, DOLLAR_IDENT, and AT_IDENT) because they can't occur in the wrong place

? t("0 'a'")
# problem: (line 1)@3: unexpected token: <"'a'" @ 1:3>

? t("def a \"b\"")
# problem: (line 1)@7: unexpected token: <""b"" @ 1:7>
XXX should escape quotes

? t("0 0x10")
# problem: (line 1)@3: unexpected token: <"0x10" @ 1:3>

? t("0 010")
# problem: (line 1)@3: unexpected token: <"010" @ 1:3>

? t("0 <ab:c>")
# problem: (line 1)@3: unexpected token: <"<ab:c>" @ 1:3>

? t("0 <ab>")
# problem: (line 1)@3: unexpected token: <"<ab>" @ 1:3>

? t("0 <ab: ")
# problem: (line 1)@3: unexpected token: <"<ab:" @ 1:3>


XXX test acceptance of line breaks everywhere
XXX general parser tests for everything

x ? t("")
x # stdout:

0 comments on commit 0008e6e

Please sign in to comment.