-
-
Notifications
You must be signed in to change notification settings - Fork 353
/
RBScanner.class.st
570 lines (508 loc) · 14.6 KB
/
RBScanner.class.st
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
"
RBScanner is a stream that returns a sequence of token from the string that it is created on. The tokens know where they came from in the source code and which comments were attached to them.
Instance Variables:
buffer <PositionableStream> Accumulates the text for the current token.
characterType <ByteSymbol> The type of the next character. (e.g. #alphabetic, etc.)
classificationTable <Array of: Symbol> Mapping from Character values to their characterType.
comments <Collection of: Interval> Source intervals of scanned comments that must be attached to the next token.
currentCharacter <Character> The character currently being processed.
errorBlock <BlockClosure> The block to execute on lexical errors.
extendedLiterals <Boolean> True if IBM-type literals are allowed. In VW, this is false.
nameSpaceCharacter <Character> The character used to separate namespaces.
numberType <ByteSymbol> The method to perform: to scan a number.
separatorsInLiterals <Boolean> True if separators are allowed within literals.
stream <PositionableStream> Contains the text to be scanned.
tokenStart <Integer> The source position of the beginning of the current token
Class Instance Variables:
classificationTable <Array> the default classification table for all characters
Shared Variables:
PatternVariableCharacter <Character> the character that starts a pattern node
"
Class {
#name : #RBScanner,
#superclass : #Object,
#instVars : [
'stream',
'buffer',
'tokenStart',
'currentCharacter',
'characterType',
'classificationTable',
'comments',
'errorBlock'
],
#classVars : [
'CascadePatternCharacter',
'KeywordPatternCharacter',
'PatternVariableCharacter'
],
#classInstVars : [
'classificationTable'
],
#category : #'AST-Core-Parser'
}
{ #category : #accessing }
RBScanner class >> cascadePatternCharacter [
^ CascadePatternCharacter
]
{ #category : #accessing }
RBScanner class >> classificationTable [
classificationTable
ifNil: [ self initializeClassificationTable ].
^ classificationTable
]
{ #category : #'class initialization' }
RBScanner class >> initialize [
self initializeClassificationTable
]
{ #category : #'class initialization' }
RBScanner class >> initializeChars: characters to: aSymbol [
characters do: [:c | classificationTable at: c asInteger put: aSymbol]
]
{ #category : #'class initialization' }
RBScanner class >> initializeClassificationTable [
PatternVariableCharacter := $`.
KeywordPatternCharacter := $@.
CascadePatternCharacter := $;.
classificationTable := Array new: 255.
self
initializeChars: (Character allCharacters
select: [ :each | each isLetter ])
to: #alphabetic.
self initializeUnderscore.
self initializeChars: '01234567890' to: #digit.
self initializeChars: Character specialCharacters to: #binary.
self initializeChars: '().:;[]{}^' to: #special.
self
initializeChars: (Character allCharacters
select: [ :each | each isSeparator ])
to: #separator
]
{ #category : #'class initialization' }
RBScanner class >> initializeUnderscore [
self classificationTable
at: $_ asInteger
put: #alphabetic
]
{ #category : #testing }
RBScanner class >> isSelector: aSymbol [
| scanner token |
scanner := self basicNew.
scanner on: (ReadStream on: aSymbol asString).
scanner step.
token := scanner scanAnySymbol.
token isLiteralToken ifFalse: [^false].
token value isEmpty ifTrue: [^false].
^scanner atEnd
]
{ #category : #testing }
RBScanner class >> isVariable: aString [
| scanner token |
scanner := self on: (ReadStream on: aString).
token := scanner next.
token isIdentifier ifFalse: [^false].
(token start = 1 and: [token stop = aString size]) ifFalse: [^false].
^(aString includes: $.) not
]
{ #category : #accessing }
RBScanner class >> keywordPatternCharacter [
^ KeywordPatternCharacter
]
{ #category : #'instance creation' }
RBScanner class >> new [
self initializeUnderscore.
^ super new
]
{ #category : #'instance creation' }
RBScanner class >> on: aStream [
| str |
str := self new on: aStream.
str
step;
stripSeparators.
^str
]
{ #category : #'instance creation' }
RBScanner class >> on: aStream errorBlock: aBlock [
| str |
str := self new on: aStream.
str
errorBlock: aBlock;
step;
stripSeparators.
^str
]
{ #category : #accessing }
RBScanner class >> patternVariableCharacter [
^ PatternVariableCharacter
]
{ #category : #testing }
RBScanner >> atEnd [
^characterType = #eof
]
{ #category : #private }
RBScanner >> classify: aCharacter [
| index |
aCharacter ifNil: [ ^ nil ].
index := aCharacter asInteger.
index == 0
ifTrue: [ ^ #separator ].
index > 255
ifTrue: [ ^ aCharacter isLetter
ifTrue: [ #alphabetic ]
ifFalse: [ aCharacter isSeparator
ifTrue: [ #separator ]
ifFalse: [ nil ]
]
].
^ classificationTable at: index
]
{ #category : #accessing }
RBScanner >> contents [
| contentsStream |
contentsStream := (Array new: 50) writeStream.
[ self atEnd ]
whileFalse: [ contentsStream nextPut: self next ].
^ contentsStream contents
]
{ #category : #'error handling' }
RBScanner >> errorBlock [
^errorBlock ifNil: [[:message :position | ]] ifNotNil: [errorBlock]
]
{ #category : #accessing }
RBScanner >> errorBlock: aBlock [
errorBlock := aBlock
]
{ #category : #'error handling' }
RBScanner >> errorPosition [
^stream position
]
{ #category : #accessing }
RBScanner >> getComments [
| oldComments |
comments isEmpty ifTrue: [^nil].
oldComments := comments.
comments := OrderedCollection new: 1.
^oldComments
]
{ #category : #testing }
RBScanner >> isReadable [
^true
]
{ #category : #testing }
RBScanner >> isWritable [
^false
]
{ #category : #accessing }
RBScanner >> next [
| token |
buffer reset.
tokenStart := stream position.
token := characterType = #eof
ifTrue:
[RBToken start: tokenStart + 1 "The EOF token should occur after the end of input"]
ifFalse: [self scanToken].
self stripSeparators.
token comments: self getComments.
^token
]
{ #category : #accessing }
RBScanner >> nextPut: anObject [
"Provide an error notification that the receiver does not
implement this message."
self shouldNotImplement
]
{ #category : #initialization }
RBScanner >> on: aStream [
buffer := (String new: 60) writeStream.
stream := aStream.
classificationTable := self class classificationTable.
comments := OrderedCollection new
]
{ #category : #'error handling' }
RBScanner >> parseErrorNode: aMessageString [
| sourceString |
sourceString := stream contents copyFrom: self errorPosition to: stream contents size.
^ RBParseErrorNode
errorMessage: aMessageString value: sourceString at: self errorPosition
]
{ #category : #private }
RBScanner >> previousStepPosition [
^characterType = #eof
ifTrue: [stream position]
ifFalse: [stream position - 1]
]
{ #category : #'private-scanning' }
RBScanner >> scanAnySymbol [
characterType = #alphabetic ifTrue: [^self scanSymbol].
characterType = #binary ifTrue: [^self scanBinary: RBLiteralToken].
^RBToken new
]
{ #category : #'private-scanning' }
RBScanner >> scanBinary: aClass [
| val |
buffer nextPut: currentCharacter.
self step.
[ characterType = #binary ] whileTrue:
[ buffer nextPut: currentCharacter.
self step ].
val := buffer contents.
val := val asSymbol.
^aClass value: val start: tokenStart
]
{ #category : #'private-scanning' }
RBScanner >> scanError: theCause [
| location |
"error location is the next not parseable character. Or current stream position + 1 if
an expected character is missing."
location := stream position.
currentCharacter
ifNotNil: [ :char | buffer nextPut: char ]
ifNil: [ location := stream position + 1 ].
^ RBErrorToken
value: buffer contents asString
start: tokenStart
cause: theCause
location: location
]
{ #category : #'private-scanning' }
RBScanner >> scanIdentifierOrKeyword [
| name |
self scanName.
(currentCharacter = $: and: [stream peek ~= $=])
ifTrue: [^self scanKeyword].
name := buffer contents.
name = 'true'
ifTrue:
[^RBLiteralToken
value: true
start: tokenStart
stop: self previousStepPosition].
name = 'false'
ifTrue:
[^RBLiteralToken
value: false
start: tokenStart
stop: self previousStepPosition].
name = 'nil'
ifTrue:
[^RBLiteralToken
value: nil
start: tokenStart
stop: self previousStepPosition].
^RBIdentifierToken value: name start: tokenStart
]
{ #category : #'private-scanning' }
RBScanner >> scanKeyword [
| outputPosition inputPosition name |
[currentCharacter = $:] whileTrue:
[buffer nextPut: currentCharacter.
outputPosition := buffer position.
inputPosition := stream position.
self step. ":"
[characterType = #alphabetic] whileTrue: [self scanName]].
buffer position: outputPosition.
stream position: inputPosition.
self step.
name := buffer contents.
^(name occurrencesOf: $:) == 1
ifTrue: [RBKeywordToken value: name start: tokenStart]
ifFalse:
[RBMultiKeywordLiteralToken
value: name asSymbol
start: tokenStart
stop: tokenStart + name size - 1]
]
{ #category : #'private-scanning' }
RBScanner >> scanLiteral [
"Do not allow whitespace between # and the literal."
"Do not allow number literals after the #"
self step.
characterType = #alphabetic
ifTrue: [ ^ self scanSymbol ].
characterType = #binary
ifTrue:
[ ^ (self scanBinary: RBLiteralToken)
stop: self previousStepPosition ].
currentCharacter = $'
ifTrue: [ ^ self scanStringSymbol ].
(currentCharacter = $( or: [ currentCharacter = $[ ])
ifTrue: [ ^ self scanLiteralArrayToken ].
"Accept multiple #."
currentCharacter = $#
ifTrue: [ ^ self scanLiteral ].
^ self scanError: 'Expecting a literal type'
]
{ #category : #'private-scanning' }
RBScanner >> scanLiteralArrayToken [
| token |
token := RBLiteralArrayToken
value: (String with: $# with: currentCharacter)
start: tokenStart.
self step.
^token
]
{ #category : #'private-scanning' }
RBScanner >> scanLiteralCharacter [
| token |
self step. "$"
currentCharacter ifNil:[ ^ self scanError:'A Character was expected'].
token := RBLiteralToken
value: currentCharacter
start: tokenStart
stop: stream position.
self step. "char"
^token
]
{ #category : #'private-scanning' }
RBScanner >> scanLiteralString [
self step.
[ currentCharacter ifNil: [ ^ self scanError: 'Unmatched '' in string literal.' ].
currentCharacter = $' and: [ self step ~= $' ]
]
whileFalse: [ buffer nextPut: currentCharacter.
self step
].
^ RBLiteralToken value: buffer contents start: tokenStart stop: self previousStepPosition
]
{ #category : #'private-scanning' }
RBScanner >> scanName [
[characterType = #alphabetic or: [characterType = #digit]] whileTrue:
[buffer nextPut: currentCharacter.
self step]
]
{ #category : #'private-scanning' }
RBScanner >> scanNumber [
| start number stop string |
start := stream position.
stream position: start - 1.
number := [NumberParser parse: stream] on: Error do: [:err | self scannerError: err messageText].
stop := stream position.
stream position: start - 1.
string := stream next: stop - start + 1.
stream position: stop.
self step.
^RBNumberLiteralToken
value: number
start: start
stop: stop
source: string
]
{ #category : #'private-scanning' }
RBScanner >> scanPatternVariable [
buffer nextPut: currentCharacter.
self step.
currentCharacter = ${
ifTrue:
[self step.
^RBPatternBlockToken value: '`{' start: tokenStart].
[characterType = #alphabetic] whileFalse:
[characterType = #eof
ifTrue: [ ^ self scanError: 'Meta variable expected'].
buffer nextPut: currentCharacter.
self step].
^self scanIdentifierOrKeyword
]
{ #category : #'private-scanning' }
RBScanner >> scanSpecialCharacter [
| character |
currentCharacter = $:
ifTrue: [
self step.
^ currentCharacter = $=
ifTrue: [
self step.
RBAssignmentToken start: tokenStart]
ifFalse: [ RBSpecialCharacterToken value: $: start: tokenStart ]].
currentCharacter = $_ ifTrue: [
self step.
^ RBShortAssignmentToken start: tokenStart ].
character := currentCharacter.
self step.
^ RBSpecialCharacterToken value: character start: tokenStart
]
{ #category : #'private-scanning' }
RBScanner >> scanStringSymbol [
| literalToken |
literalToken := self scanLiteralString.
literalToken value: literalToken value asSymbol.
^literalToken
]
{ #category : #'private-scanning' }
RBScanner >> scanSymbol [
[ characterType = #alphabetic or: [ currentCharacter = $: ] ] whileTrue: [
self scanName.
currentCharacter = $: ifTrue: [
buffer nextPut: $:.
self step ] ].
^ RBLiteralToken
value: buffer contents asSymbol
start: tokenStart
stop: self previousStepPosition
]
{ #category : #accessing }
RBScanner >> scanToken [
"fast-n-ugly. Don't write stuff like this. Has been found to cause cancer in laboratory rats. Basically a
case statement. Didn't use Dictionary because lookup is pretty slow."
characterType = #alphabetic ifTrue: [^self scanIdentifierOrKeyword].
(characterType = #digit
or: [currentCharacter = $- and: [(self classify: stream peek) = #digit]])
ifTrue: [^self scanNumber].
characterType = #binary ifTrue: [^self scanBinary: RBBinarySelectorToken].
characterType = #special ifTrue: [^self scanSpecialCharacter].
currentCharacter = $' ifTrue: [^self scanLiteralString].
currentCharacter = $# ifTrue: [^self scanLiteral].
currentCharacter = $$ ifTrue: [^self scanLiteralCharacter].
^self scanUnknownCharacter
]
{ #category : #'private-scanning' }
RBScanner >> scanUnknownCharacter [
| errorToken |
errorToken := self scanError: 'Unknown character'.
"advance"
self step.
^ errorToken
]
{ #category : #'error handling' }
RBScanner >> scannerError: aString [
(self errorBlock cull: aString cull: self errorPosition cull: self) ifNil: [
^ SyntaxErrorNotification
inClass: Object
withCode: stream contents
doitFlag: false
errorMessage: aString
location: stream position + 1
]
]
{ #category : #private }
RBScanner >> step [
stream atEnd
ifTrue:
[characterType := #eof.
^currentCharacter := nil].
currentCharacter := stream next.
characterType := self classify: currentCharacter.
^currentCharacter
]
{ #category : #'private-scanning' }
RBScanner >> stripComment [
| start stop |
start := stream position.
self step.
characterType = #eof
ifTrue: [ ^ self scannerError: 'Unmatched " in comment.' ].
[ currentCharacter = $" and: [ self step ~= $" ] ]
whileFalse: [ characterType = #eof
ifTrue: [ ^ self scannerError: 'Unmatched " in comment.' ].
self step ].
stop := characterType = #eof
ifTrue: [ stream position ]
ifFalse: [ stream position - 1 ].
comments add: (start to: stop)
]
{ #category : #'private-scanning' }
RBScanner >> stripSeparators [
[[characterType = #separator] whileTrue: [self step].
currentCharacter = $"]
whileTrue: [self stripComment]
]