-
Notifications
You must be signed in to change notification settings - Fork 2
/
Topdec.lex
186 lines (170 loc) · 6.57 KB
/
Topdec.lex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
(* Lexical specification for Standard ML. NICK, August 1990. *)
open Tokens
type pos = LexBasics.pos
type arg = LexUtils.LexArgument
type lexresult = (svalue, LexBasics.pos) token
fun eof _ = Tokens.EOF(LexBasics.DUMMY, LexBasics.DUMMY)
val lParen = "(" and rParen = ")"
(* Something which returns a (pos * pos) for a token. This is what ML-Yacc
works with, and we use it in the productions below. *)
fun ofLength(arg, yypos, yytext) =
let
val yypos = yypos - 2
(* If somebody can tell be why the yypos seems to be permanently
two characters ahead of where it should be, I'd be interested... *)
val LexBasics.SOURCE_READER{positionFn, ...} =
LexUtils.sourceReaderOf arg
val lPos = positionFn yypos
val rPos = positionFn(yypos + size yytext)
in
(lPos, rPos)
end
fun token0(tokFn, arg, yypos, yytext) =
tokFn(ofLength(arg, yypos, yytext))
and token1(tokFn, value, arg, yypos, yytext) =
let
val (l, r) = ofLength(arg, yypos, yytext)
in
tokFn(value, l, r)
end
fun positionOfStream(arg, yypos) =
let
val LexBasics.SOURCE_READER{positionFn, ...} =
LexUtils.sourceReaderOf arg
in
positionFn yypos
end
fun error(arg, yypos, msg) =
raise LexBasics.LEXICAL_ERROR(positionOfStream(arg, yypos), msg)
(* addAsciiChar can fail, so we need to generate position info for it. *)
fun addAsciiChar(arg, yypos, yytext) =
LexUtils.addAsciiChar (positionOfStream(arg, yypos), yytext) arg
(*addUnicodeChar can fail, so we need to generate position info for it.*)
fun addUnicodeChar (arg, yypos, yytext) =
LexUtils.addUnicodeChar (positionOfStream (arg, yypos), yytext) arg
%%
%header (functor TopdecLex(structure Tokens: Topdec_TOKENS
structure LexBasics: LEX_BASICS
structure LexUtils: LEX_UTILS
sharing type LexUtils.svalue = Tokens.svalue
sharing type LexUtils.token = Tokens.token
sharing type LexUtils.pos = LexBasics.pos
sharing type LexUtils.SourceReader
= LexBasics.SourceReader
)
);
%arg (arg: UserDeclarations.arg);
WhiteSpace = [\ \t\r]+;
VWhiteSpace = [\ \t\r\n\012]+;
UC = [A-Z];
LC = [a-z];
Letter = {UC} | {LC};
Digit = [0-9];
DecPosInteger = {Digit}+;
DecNegInteger = \126 {DecPosInteger};
HexDigit = [0-9a-fA-F];
HexInteger = (\126)? "0x" {HexDigit}+;
Word = "0w" ("x" {HexDigit}+ | {Digit}+);
DecInteger = {DecPosInteger} | {DecNegInteger};
Real = ({DecInteger} "." {DecPosInteger} ("E" {DecInteger})?)
| ({DecInteger} "E" {DecInteger});
NormalId = {Letter} ({Letter} | {Digit} | [_'])*;
TyVar = "'" ({Letter} | {Digit} | [-+?_']) ({Letter} | {Digit} | [_'])*;
Symbol = [-!%&$#+<=>?@\\~`^|*:/];
SymbolicId = {Symbol}+;
AnyId = {NormalId} | {SymbolicId};
QualifiedId = ({AnyId} ".")+ {AnyId};
%s S C;
%%
<INITIAL>{VWhiteSpace} => (continue());
<INITIAL>{NormalId} => (token1(LexUtils.identifier, yytext,
arg, yypos, yytext
)
);
<INITIAL>{SymbolicId} => (token1(LexUtils.identifier, yytext,
arg, yypos, yytext
)
);
<INITIAL>{QualifiedId} => (token1(if LexUtils.isQualStar yytext
then (LexBasics.shifting "QUAL_STAR";
QUAL_STAR
)
else (LexBasics.shifting "QUAL_ID";
QUAL_ID
),
LexUtils.asQualId yytext,
arg, yypos, yytext
)
);
<INITIAL>"..." => (token0(DOTDOTDOT, arg, yypos, yytext));
<INITIAL>"(" => (token0(LPAREN, arg, yypos, yytext));
<INITIAL>")" => (token0(RPAREN, arg, yypos, yytext));
<INITIAL>"[" => (token0(LBRACKET, arg, yypos, yytext));
<INITIAL>"]" => (token0(RBRACKET, arg, yypos, yytext));
<INITIAL>"{" => (token0(LBRACE, arg, yypos, yytext));
<INITIAL>"}" => (token0(RBRACE, arg, yypos, yytext));
<INITIAL>"," => (token0(COMMA, arg, yypos, yytext));
<INITIAL>";" => (token0(SEMICOLON, arg, yypos, yytext));
<INITIAL>"_" => (token0(UNDERBAR, arg, yypos, yytext));
<INITIAL>{Real} => (LexBasics.shifting "REAL(...)";
token1(REAL, LexUtils.asReal yytext,
arg, yypos, yytext));
<INITIAL>{Digit} => (LexBasics.shifting "DIGIT(...)";
token1(DIGIT, LexUtils.asDigit yytext,
arg, yypos, yytext));
<INITIAL>{DecPosInteger}=> (LexBasics.shifting "DECPOSINTEGER(...)";
token1(DECPOSINTEGER, LexUtils.asInteger yytext,
arg, yypos, yytext));
<INITIAL>{DecNegInteger}=> (LexBasics.shifting "DECNEGINTEGER(...)";
token1(DECNEGINTEGER, LexUtils.asInteger yytext,
arg, yypos, yytext));
<INITIAL>{HexInteger} => (LexBasics.shifting "HEXINTEGER(...)";
token1(HEXINTEGER, LexUtils.asInteger yytext,
arg, yypos, yytext));
<INITIAL>{Word} => (LexBasics.shifting "WORD(...)";
token1(WORD, LexUtils.asWord yytext,
arg, yypos, yytext));
<INITIAL>{TyVar} => (LexBasics.shifting "TYVAR(...)";
token1(TYVAR, yytext, arg, yypos, yytext));
<INITIAL>\" => (YYBEGIN S; lex (LexUtils.clearString arg) ());
<INITIAL>"(*" => (YYBEGIN C; lex (LexUtils.newComment arg) ());
<INITIAL>"(*[" => (token0(LCOMSPEC, arg, yypos, yytext));
<INITIAL>"]*)" => (token0(RCOMSPEC, arg, yypos, yytext));
<S>[^"\\\n]* => (lex (LexUtils.addChars yytext arg) ());
<S>\" => (YYBEGIN INITIAL;
LexBasics.shifting "STRING(...)";
token1(STRING, LexUtils.asString arg,
arg, yypos, yytext
)
);
<S>\n => (error(arg, yypos, "unclosed string");
YYBEGIN INITIAL;
LexBasics.shifting "STRING(bad)";
token1(STRING, "", arg, yypos, yytext)
);
<S>\\{VWhiteSpace}\\ => (continue());
<S>\\a => (lex (LexUtils.addChars (str(chr 7)) arg) ());
<S>\\b => (lex (LexUtils.addChars (str(chr 8)) arg) ());
<S>\\t => (lex (LexUtils.addChars "\t" arg) ());
<S>\\n => (lex (LexUtils.addChars "\n" arg) ());
<S>\\v => (lex (LexUtils.addChars (str(chr 11)) arg) ());
<S>\\f => (lex (LexUtils.addChars (str(chr 12)) arg) ());
<S>\\r => (lex (LexUtils.addChars (str(chr 13)) arg) ());
<S>\\\^[@-_] => (lex (LexUtils.addControlChar yytext arg) ());
<S>\\[0-9]{3} => (lex (addAsciiChar (arg, yypos, yytext)) ());
<S>\\u{HexDigit}{4} => (lex (addUnicodeChar (arg, yypos, yytext)) ());
<S>\\\" => (lex (LexUtils.addChars "\"" arg) ());
<S>\\\\ => (lex (LexUtils.addChars "\\" arg) ());
<S>\\ => (error(arg, yypos, "illegal string escape");
continue()
);
<C>"(*" => (lex (LexUtils.incComment arg) ());
<C>"*)" => (case LexUtils.decComment arg
of (0, arg') => (YYBEGIN INITIAL; lex arg' ())
| (_, arg') => lex arg' ()
);
<C>. => (continue());
<C>\n => (continue());
. => (error(arg, yypos, "cannot lex \"" ^ yytext ^ "\"");
continue()
);