forked from factor/factor
/
lexer.factor
156 lines (118 loc) · 4.09 KB
/
lexer.factor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
! Copyright (C) 2008, 2010 Slava Pestov, Joe Groff.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences accessors namespaces math words strings
io vectors arrays math.parser combinators continuations
source-files.errors ;
IN: lexer
TUPLE: lexer text line line-text line-length column parsing-words ;
TUPLE: lexer-parsing-word word line line-text column ;
: next-line ( lexer -- )
dup [ line>> ] [ text>> ] bi ?nth
[ >>line-text ] [ length >>line-length ] bi
[ 1 + ] change-line
0 >>column
drop ;
: push-parsing-word ( word -- )
lexer-parsing-word new
swap >>word
lexer get [
[ line>> >>line ]
[ line-text>> >>line-text ]
[ column>> >>column ] tri
] [ parsing-words>> push ] bi ;
: pop-parsing-word ( -- )
lexer get parsing-words>> pop* ;
: new-lexer ( text class -- lexer )
new
0 >>line
swap >>text
V{ } clone >>parsing-words
dup next-line ; inline
: <lexer> ( text -- lexer )
lexer new-lexer ;
ERROR: unexpected want got ;
: forbid-tab ( c -- c )
[ CHAR: \t eq? [ "[space]" "[tab]" unexpected ] when ] keep ; inline
: skip ( i seq ? -- n )
over length
[ [ swap forbid-tab CHAR: \s eq? xor ] curry find-from drop ] dip or ;
: change-lexer-column ( lexer quot -- )
[ [ column>> ] [ line-text>> ] bi ] prepose keep
column<< ; inline
GENERIC: skip-blank ( lexer -- )
M: lexer skip-blank ( lexer -- )
[ t skip ] change-lexer-column ;
GENERIC: skip-word ( lexer -- )
M: lexer skip-word ( lexer -- )
[
2dup nth CHAR: " eq? [ drop 1 + ] [ f skip ] if
] change-lexer-column ;
: still-parsing? ( lexer -- ? )
[ line>> ] [ text>> length ] bi <= ;
: still-parsing-line? ( lexer -- ? )
[ column>> ] [ line-length>> ] bi < ;
: (parse-token) ( lexer -- str )
{
[ column>> ]
[ skip-word ]
[ column>> ]
[ line-text>> ]
} cleave subseq ;
: parse-token ( lexer -- str/f )
dup still-parsing? [
dup skip-blank
dup still-parsing-line?
[ (parse-token) ] [ dup next-line parse-token ] if
] [ drop f ] if ;
: (scan-token) ( -- str/f ) lexer get parse-token ;
PREDICATE: unexpected-eof < unexpected got>> not ;
: unexpected-eof ( word -- * ) f unexpected ;
: scan-token ( -- str ) (scan-token) [ "token" unexpected-eof ] unless* ;
: expect ( token -- )
scan-token 2dup = [ 2drop ] [ unexpected ] if ;
: each-token ( ... end quot: ( ... token -- ... ) -- ... )
[ scan-token ] 2dip 2over =
[ 3drop ] [ [ nip call ] [ each-token ] 2bi ] if ; inline recursive
: map-tokens ( ... end quot: ( ... token -- ... elt ) -- ... seq )
collector [ each-token ] dip { } like ; inline
: parse-tokens ( end -- seq )
[ ] map-tokens ;
TUPLE: lexer-error line column line-text parsing-words error ;
M: lexer-error error-file error>> error-file ;
M: lexer-error error-line [ error>> error-line ] [ line>> ] bi or ;
: <lexer-error> ( msg -- error )
\ lexer-error new
lexer get [
[ line>> >>line ]
[ column>> >>column ] bi
] [
[ line-text>> >>line-text ]
[ parsing-words>> clone >>parsing-words ] bi
] bi
swap >>error ;
: simple-lexer-dump ( error -- )
[ line>> number>string ": " append ]
[ line-text>> dup string? [ drop "" ] unless ]
[ column>> 0 or ] tri
pick length + CHAR: \s <string>
[ write ] [ print ] [ write "^" print ] tri* ;
: (parsing-word-lexer-dump) ( error parsing-word -- )
[
line>> number>string
over line>> number>string length
CHAR: \s pad-head
": " append write
] [ line-text>> dup string? [ drop "" ] unless print ] bi
simple-lexer-dump ;
: parsing-word-lexer-dump ( error parsing-word -- )
2dup [ line>> ] bi@ =
[ drop simple-lexer-dump ]
[ (parsing-word-lexer-dump) ] if ;
: lexer-dump ( error -- )
dup parsing-words>>
[ simple-lexer-dump ]
[ last parsing-word-lexer-dump ] if-empty ;
: with-lexer ( lexer quot -- newquot )
[ lexer set ] dip [ <lexer-error> rethrow ] recover ; inline
SYMBOL: lexer-factory
[ <lexer> ] lexer-factory set-global