-
Notifications
You must be signed in to change notification settings - Fork 82
/
wrappers.hs
436 lines (340 loc) · 15.2 KB
/
wrappers.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
-- -----------------------------------------------------------------------------
-- Alex wrapper code.
--
-- This code is in the PUBLIC DOMAIN; you may copy it freely and use
-- it for any purpose whatsoever.
import Data.Word (Word8)
#if defined(ALEX_BASIC_BYTESTRING) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
import qualified Data.Char
import qualified Data.ByteString.Lazy as ByteString
import qualified Data.ByteString.Internal as ByteString (w2c)
#elif defined(ALEX_STRICT_BYTESTRING)
import qualified Data.Char
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Internal as ByteString
import qualified Data.ByteString.Unsafe as ByteString
#else
import qualified Data.Bits
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
utf8Encode :: Char -> [Word8]
utf8Encode = map fromIntegral . go . ord
where
go oc
| oc <= 0x7f = [oc]
| oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
, 0x80 + oc Data.Bits..&. 0x3f
]
| oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
, 0x80 + oc Data.Bits..&. 0x3f
]
| otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
, 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
, 0x80 + oc Data.Bits..&. 0x3f
]
#endif
type Byte = Word8
-- -----------------------------------------------------------------------------
-- The input type
#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_GSCAN)
type AlexInput = (AlexPosn, -- current position,
Char, -- previous char
[Byte], -- pending bytes on current char
String) -- current input string
ignorePendingBytes :: AlexInput -> AlexInput
ignorePendingBytes (p,c,ps,s) = (p,c,[],s)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p,c,bs,s) = c
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (p,c,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
(b:bs) = utf8Encode c
in p' `seq` Just (b, (p', c, bs, s))
#endif
#if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
type AlexInput = (AlexPosn, -- current position,
Char, -- previous char
ByteString.ByteString) -- current input string
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p,c,s) = c
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,_,cs) | ByteString.null cs = Nothing
| otherwise = let b = ByteString.head cs
cs' = ByteString.tail cs
c = ByteString.w2c b
p' = alexMove p c
in p' `seq` cs' `seq` Just (b, (p', c, cs'))
#endif
#ifdef ALEX_BASIC_BYTESTRING
type AlexInput = (Char,
ByteString.ByteString)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (c,_) = c
alexGetByte (_, cs)
| ByteString.null cs = Nothing
| otherwise = Just (ByteString.head cs,
(ByteString.w2c $ ByteString.head cs,
ByteString.tail cs))
#endif
#ifdef ALEX_STRICT_BYTESTRING
data AlexInput = AlexInput { alexChar :: {-# UNPACK #-}!Char
, alexStr :: {-# UNPACK #-}!ByteString.ByteString }
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = alexChar
alexGetByte (AlexInput _ cs)
| ByteString.null cs = Nothing
| otherwise = Just $! (ByteString.head cs, AlexInput c cs')
where
(c,cs') = (ByteString.w2c (ByteString.unsafeHead cs)
, ByteString.unsafeTail cs)
#endif
-- -----------------------------------------------------------------------------
-- Token positions
-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of chacaters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
-- assuming the usual eight character tab stops.
#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN)
data AlexPosn = AlexPn !Int !Int !Int
deriving (Eq,Show)
alexStartPos :: AlexPosn
alexStartPos = AlexPn 0 1 1
alexMove :: AlexPosn -> Char -> AlexPosn
alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1)
alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1) 1
alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
#endif
-- -----------------------------------------------------------------------------
-- Default monad
#ifdef ALEX_MONAD
data AlexState = AlexState {
alex_pos :: !AlexPosn, -- position at current input location
alex_inp :: String, -- the current input
alex_chr :: !Char, -- the character before the input
alex_bytes :: [Byte],
alex_scd :: !Int -- the current startcode
#ifdef ALEX_MONAD_USER_STATE
, alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
#endif
}
-- Compile with -funbox-strict-fields for best results!
runAlex :: String -> Alex a -> Either String a
runAlex input (Alex f)
= case f (AlexState {alex_pos = alexStartPos,
alex_inp = input,
alex_chr = '\n',
alex_bytes = [],
#ifdef ALEX_MONAD_USER_STATE
alex_ust = alexInitUserState,
#endif
alex_scd = 0}) of Left msg -> Left msg
Right ( _, a ) -> Right a
newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) }
instance Monad Alex where
m >>= k = Alex $ \s -> case unAlex m s of
Left msg -> Left msg
Right (s',a) -> unAlex (k a) s'
return a = Alex $ \s -> Right (s,a)
alexGetInput :: Alex AlexInput
alexGetInput
= Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp} ->
Right (s, (pos,c,bs,inp))
alexSetInput :: AlexInput -> Alex ()
alexSetInput (pos,c,bs,inp)
= Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp} of
s@(AlexState{}) -> Right (s, ())
alexError :: String -> Alex a
alexError message = Alex $ \s -> Left message
alexGetStartCode :: Alex Int
alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
alexSetStartCode :: Int -> Alex ()
alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
#ifdef ALEX_MONAD_USER_STATE
alexGetUserState :: Alex AlexUserState
alexGetUserState = Alex $ \s@AlexState{alex_ust=ust} -> Right (s,ust)
alexSetUserState :: AlexUserState -> Alex ()
alexSetUserState ss = Alex $ \s -> Right (s{alex_ust=ss}, ())
#endif
alexMonadScan = do
inp <- alexGetInput
sc <- alexGetStartCode
case alexScan inp sc of
AlexEOF -> alexEOF
AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp' len -> do
alexSetInput inp'
alexMonadScan
AlexToken inp' len action -> do
alexSetInput inp'
action (ignorePendingBytes inp) len
-- -----------------------------------------------------------------------------
-- Useful token actions
type AlexAction result = AlexInput -> Int -> Alex result
-- just ignore this token and scan another one
-- skip :: AlexAction result
skip input len = alexMonadScan
-- ignore this token, but set the start code to a new value
-- begin :: Int -> AlexAction result
begin code input len = do alexSetStartCode code; alexMonadScan
-- perform an action for this token, and set the start code to a new value
andBegin :: AlexAction result -> Int -> AlexAction result
(action `andBegin` code) input len = do alexSetStartCode code; action input len
token :: (AlexInput -> Int -> token) -> AlexAction token
token t input len = return (t input len)
#endif /* ALEX_MONAD */
-- -----------------------------------------------------------------------------
-- Monad (with ByteString input)
#ifdef ALEX_MONAD_BYTESTRING
data AlexState = AlexState {
alex_pos :: !AlexPosn, -- position at current input location
alex_inp :: ByteString.ByteString, -- the current input
alex_chr :: !Char, -- the character before the input
alex_scd :: !Int -- the current startcode
#ifdef ALEX_MONAD_USER_STATE
, alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
#endif
}
-- Compile with -funbox-strict-fields for best results!
runAlex :: ByteString.ByteString -> Alex a -> Either String a
runAlex input (Alex f)
= case f (AlexState {alex_pos = alexStartPos,
alex_inp = input,
alex_chr = '\n',
#ifdef ALEX_MONAD_USER_STATE
alex_ust = alexInitUserState,
#endif
alex_scd = 0}) of Left msg -> Left msg
Right ( _, a ) -> Right a
newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) }
instance Monad Alex where
m >>= k = Alex $ \s -> case unAlex m s of
Left msg -> Left msg
Right (s',a) -> unAlex (k a) s'
return a = Alex $ \s -> Right (s,a)
alexGetInput :: Alex AlexInput
alexGetInput
= Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_inp=inp} ->
Right (s, (pos,c,inp))
alexSetInput :: AlexInput -> Alex ()
alexSetInput (pos,c,inp)
= Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_inp=inp} of
s@(AlexState{}) -> Right (s, ())
alexError :: String -> Alex a
alexError message = Alex $ \s -> Left message
alexGetStartCode :: Alex Int
alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
alexSetStartCode :: Int -> Alex ()
alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
alexMonadScan = do
inp <- alexGetInput
sc <- alexGetStartCode
case alexScanB inp sc of
AlexEOF -> alexEOF
AlexError ((AlexPn _ line column),_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp' len -> do
alexSetInput inp'
alexMonadScan
AlexToken inp' len action -> do
alexSetInput inp'
action (ignorePendingBytes inp) len
-- -----------------------------------------------------------------------------
-- Useful token actions
type AlexAction result = AlexInput -> Int -> Alex result
-- just ignore this token and scan another one
-- skip :: AlexAction result
skip input len = alexMonadScan
-- ignore this token, but set the start code to a new value
-- begin :: Int -> AlexAction result
begin code input len = do alexSetStartCode code; alexMonadScan
-- perform an action for this token, and set the start code to a new value
andBegin :: AlexAction result -> Int -> AlexAction result
(action `andBegin` code) input len = do alexSetStartCode code; action input len
token :: (AlexInput -> Int -> token) -> AlexAction token
token t input len = return (t input len)
#endif /* ALEX_MONAD_BYTESTRING */
-- -----------------------------------------------------------------------------
-- Basic wrapper
#ifdef ALEX_BASIC
type AlexInput = (Char,[Byte],String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (c,_,_) = c
-- alexScanTokens :: String -> [token]
alexScanTokens str = go ('\n',[],str)
where go inp@(_,_bs,s) =
case alexScan inp 0 of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp' len -> go inp'
AlexToken inp' len act -> act (take len s) : go inp'
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s))
alexGetByte (c,[],[]) = Nothing
alexGetByte (_,[],(c:s)) = case utf8Encode c of
(b:bs) -> Just (b, (c, bs, s))
[] -> Nothing
#endif
-- -----------------------------------------------------------------------------
-- Basic wrapper, ByteString version
#ifdef ALEX_BASIC_BYTESTRING
-- alexScanTokens :: String -> [token]
alexScanTokens str = go ('\n',str)
where go inp@(_,str) =
case alexScanB inp 0 of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp' len -> go inp'
AlexToken inp' len act -> act (ByteString.take (fromIntegral len) str) : go inp'
#endif
#ifdef ALEX_STRICT_BYTESTRING
-- alexScanTokens :: String -> [token]
alexScanTokens str = go (AlexInput '\n' str)
where go inp@(AlexInput _ str) =
case alexScanB inp 0 of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp' len -> go inp'
AlexToken inp' len act -> act (ByteString.unsafeTake len str) : go inp'
#endif
-- -----------------------------------------------------------------------------
-- Posn wrapper
-- Adds text positions to the basic model.
#ifdef ALEX_POSN
--alexScanTokens :: String -> [token]
alexScanTokens str = go (alexStartPos,'\n',[],str)
where go inp@(pos,_,_,str) =
case alexScan inp 0 of
AlexEOF -> []
AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp' len -> go inp'
AlexToken inp' len act -> act pos (take len str) : go inp'
#endif
-- -----------------------------------------------------------------------------
-- Posn wrapper, ByteString version
#ifdef ALEX_POSN_BYTESTRING
--alexScanTokens :: ByteString -> [token]
alexScanTokens str = go (alexStartPos,'\n',str)
where go inp@(pos,_,str) =
case alexScanB inp 0 of
AlexEOF -> []
AlexError ((AlexPn _ line column),_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp' len -> go inp'
AlexToken inp' len act -> act pos (ByteString.take (fromIntegral len) str) : go inp'
#endif
-- -----------------------------------------------------------------------------
-- GScan wrapper
-- For compatibility with previous versions of Alex, and because we can.
#ifdef ALEX_GSCAN
alexGScan stop state inp = alex_gscan stop alexStartPos '\n' [] inp (0,state)
alex_gscan stop p c bs inp (sc,state) =
case alexScan (p,c,bs,inp) sc of
AlexEOF -> stop p c inp (sc,state)
AlexError _ -> stop p c inp (sc,state)
AlexSkip (p',c',bs',inp') len -> alex_gscan stop p' c' bs' inp' (sc,state)
AlexToken (p',c',bs',inp') len k ->
k p c inp len (\scs -> alex_gscan stop p' c' bs' inp' scs)
(sc,state)
#endif