Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
290 lines (242 sloc) 7.03 KB
/* reader form clojure page errors
* symbols can contain chars < > = $ ($ is mandatory for member class resolving, and is used in boot.clj) in the implementation (and in fact functions < and > are defined in clojure core !!)
* Metadata must be Symbol,Keyword,String or Map : add this precision to the documentation ?
*/
grammar Clojure;
/*
options {
// TODO : try to refactor the grammar to get rid of backtrack=true or to minimize the backtracking
backtrack=true;
rewrite=true;
}
*/
//options {output=template; rewrite=true;}
@members {
boolean inLambda=false;
int syntaxQuoteDepth = 0;
java.util.List symbols = new java.util.ArrayList();
public List getCollectedSymbols() { return symbols; }
// TODO envisage to remove this when the grammar is fully tested ?
//public void recover(IntStream input, RecognitionException re) {
// throw new RuntimeException("Not recovering from RecognitionException, na!", re);
//}
//}
//@lexer::members {
java.util.Map parensMatching = new java.util.HashMap();
public Integer matchingParenForPosition(Integer position) {
return (Integer) parensMatching.get(position);
}
public void clearParensMatching() { parensMatching.clear(); }
}
/*
* Lexer part
*/
OPEN_PAREN: '('
;
CLOSE_PAREN: ')'
;
AMPERSAND: '&'
;
LEFT_SQUARE_BRACKET: '['
;
RIGHT_SQUARE_BRACKET: ']'
;
LEFT_CURLY_BRACKET: '{'
;
RIGHT_CURLY_BRACKET: '}'
;
BACKSLASH: '\\'
;
CIRCUMFLEX: '^'
;
COMMERCIAL_AT: '@'
;
NUMBER_SIGN: '#'
;
APOSTROPHE: '\''
;
// TODO complete this list
SPECIAL_FORM: 'def' | 'if' | 'do' | 'let' | 'quote' | 'var' | 'fn' | 'loop' |
'recur' | 'throw' | 'try' | 'monitor-enter' | 'monitor-exit' |
'new' | 'set!' | '.'
;
// taken from the java grammar example of Terrence Parr
STRING
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
REGEX_LITERAL
: NUMBER_SIGN '"' ( ~('\\' | '"') | '\\' . )* '"'
;
// taken from the java grammar example of Terrence Parr
fragment
EscapeSequence
: '\\' .
// : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
// | UnicodeEscape
// | OctalEscape
;
// taken from the java grammar example of Terrence Parr
fragment
UnicodeEscape
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
;
// taken from the java grammar example of Terrence Parr
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
// TODO get the real definition from a java grammar.
// FIXME for the moment, allow just positive integers to start playing with the grammar
NUMBER: '-'? '0'..'9'+ ('.' '0'..'9'+)? (('e'|'E') '-'? '0'..'9'+)?
;
CHARACTER:
'\\newline'
| '\\space'
| '\\tab'
| '\\u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
| BACKSLASH . // TODO : is it correct to allow anything ?
;
HEXDIGIT:
'0'..'9' | 'a'..'f' | 'A'..'F';
NIL: 'nil'
;
BOOLEAN:
'true'
| 'false'
;
SYMBOL:
'/' // The division function FIXME is it necessary to hardcode this ?
| NAME ('/' NAME)?
;
METADATA_TYPEHINT:
NUMBER_SIGN* CIRCUMFLEX ( 'ints' | 'floats' | 'longs' | 'doubles' | 'objects' | NAME | STRING )*
;
fragment
NAME: SYMBOL_HEAD SYMBOL_REST* (':' SYMBOL_REST+)*
;
fragment
SYMBOL_HEAD:
'a'..'z' | 'A'..'Z' | '*' | '+' | '!' | '-' | '_' | '?' | '>' | '<' | '=' | '$'
// other characters will be allowed eventually, but not all macro characters have been determined
;
fragment
SYMBOL_REST:
SYMBOL_HEAD
| '0'..'9' // Done this because a strange cannot find matchRange symbol occured when compiling the parser
| '.' // multiple successive points is allowed by the reader (but will break at evaluation)
| NUMBER_SIGN // FIXME normally # is allowed only in syntax quote forms, in last position
;
literal:
STRING //-> template(it={$STRING.text}) "<span style='color: red ; '>$it$</span>"
| NUMBER
| CHARACTER
| NIL
| BOOLEAN
| KEYWORD
;
KEYWORD:
':' (':')? SYMBOL_REST+ ('/' SYMBOL_REST+)?
;
SYNTAX_QUOTE:
'`'
;
UNQUOTE_SPLICING:
'~@'
;
UNQUOTE:
'~'
;
COMMENT:
';' ~('\r' | '\n')* ('\r'? '\n')? {$channel=HIDDEN;} //{skip();} // FIXME should use NEWLINE but NEWLINE has a problem I don't understand for the moment
;
SPACE: (' '|'\t'|','|'\r'|'\n')+ {$channel=HIDDEN;} // FIXME should use NEWLINE but NEWLINE has a problem I don't understand for the moment
;
// TODO how many
LAMBDA_ARG:
'%' '1'..'9' '0'..'9'*
| '%&'
| '%'
;
/*
* Parser part
*/
file:
( form { System.out.println("form found"); } )*
;
// Note : dispatch macros are hardwired in clojure
form :
{this.inLambda}? LAMBDA_ARG
| literal // Place literal first to make nil and booleans take precedence over symbol (impossible to
// name a symbol nil, true or false)
| COMMENT
| AMPERSAND
| metadataForm? ( SPECIAL_FORM | s=SYMBOL { symbols.add(s.getText()); } | list | vector | map )
| macroForm
| dispatchMacroForm
| set
;
macroForm:
quoteForm
| metaForm
| derefForm
| syntaxQuoteForm
| { this.syntaxQuoteDepth > 0 }? unquoteSplicingForm
| { this.syntaxQuoteDepth > 0 }? unquoteForm
;
dispatchMacroForm:
REGEX_LITERAL
| varQuoteForm
| {!this.inLambda}? lambdaForm // contraction for anonymousFunction
;
list: o=OPEN_PAREN form * c=CLOSE_PAREN { parensMatching.put(Integer.valueOf(o.getTokenIndex()), Integer.valueOf(c.getTokenIndex())); parensMatching.put(Integer.valueOf(c.getTokenIndex()), Integer.valueOf(o.getTokenIndex())); }
;
vector: LEFT_SQUARE_BRACKET form* RIGHT_SQUARE_BRACKET
;
map: LEFT_CURLY_BRACKET (form form)* RIGHT_CURLY_BRACKET
;
quoteForm
@init { this.syntaxQuoteDepth++; }
@after { this.syntaxQuoteDepth--; }
: APOSTROPHE form
;
metaForm: CIRCUMFLEX form
;
derefForm: COMMERCIAL_AT form
;
syntaxQuoteForm
@init { this.syntaxQuoteDepth++; }
@after { this.syntaxQuoteDepth--; }
:
SYNTAX_QUOTE form
;
unquoteForm
@init { this.syntaxQuoteDepth--; }
@after { this.syntaxQuoteDepth++; }
:
UNQUOTE form
;
unquoteSplicingForm
@init { this.syntaxQuoteDepth--; }
@after { this.syntaxQuoteDepth++; }
:
UNQUOTE_SPLICING form
;
set: NUMBER_SIGN LEFT_CURLY_BRACKET form* RIGHT_CURLY_BRACKET
;
metadataForm:
NUMBER_SIGN CIRCUMFLEX (map | SYMBOL|KEYWORD|STRING)
;
varQuoteForm:
NUMBER_SIGN APOSTROPHE form
;
lambdaForm
@init {
this.inLambda = true;
}
@after {
this.inLambda = false;
}
: NUMBER_SIGN list
;