/
lexer.co
1085 lines (1003 loc) · 36.4 KB
/
lexer.co
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# The Lexer Object
# ----------------
# Reads a string of Coco code and divvies it up into tagged tokens.
# Tokens are in the form:
#
# ['TAG', 'value', lineNumber = 0]
#
# which is a format that can be fed directly into
# [Jison](http://github.com/zaach/jison) generated [parser](../lib/parser.js).
# Some potential ambiguity in the grammar has been avoided by
# pushing some extra smarts into Lexer.
exports import
#### Public Methods
# The entry point.
lex: (
# Coco source to be parsed into an array of tokens.
code
# - `.raw` <br> Suppresses token rewriting if truthy.
# - `.line` <br> Specifies the starting line. Default `0`.
options
# Copy, set, go!
) -> (^exports)tokenize code||'' options||{}
# Rewrites the token stream in multiple passes, one logical filter at a time.
# The order of these passes matters--indentation must be
# corrected before implicit parentheses can be wrapped around blocks of code.
rewrite: (it || @tokens) ->
addImplicitIndentation it
rewriteBlockless it
addImplicitParentheses it
addImplicitBraces it
expandLiterals it
it.shift! if it.0?0 is \NEWLINE
it
#### Main Loop
tokenize: (code, o) ->
@inter or code.=replace /[\r\u2028\u2029\uFEFF]/g ''
# Prepend a newline to handle leading INDENT.
code = \\n + code
# Stream of parsed tokens,
# initialized with a NEWLINE token to ensure `@last` always exists.
@tokens = [@last = [\NEWLINE \\n 0]]
# The current line number. Starts from -1 to setoff the prepended newline.
@line = ~-o.line
# The stack of all current indentation levels.
@dents = []
# The stack for pairing tokens.
@closes = []
# The stack for tagging parameters.
@parens = []
# The stack of flags per nesting level.
@flags = []
# Call tokenizers based on the character at current `i`ndex.
# Each tokenizing method is responsible for
# returning the number of characters it has consumed.
i = 0
while c = code.charAt i
switch c
case ' ' then i += @doSpace code, i
case \\n then i += @doLine code, i
case \\ then i += @doBackslash code, i
case \' \" then i += @doString code, i, c
case \0 to \9 then i += @doNumber code, i
case \/
switch code.charAt i+1
case \* then i += @doComment code, i
case \/ then i += @doHeregex code, i
default i += @doRegex code, i or @doLiteral code, i
case \` then i += @doJS code, i
default i += @doID code, i or @doLiteral code, i or @doSpace code, i
# Close up all remaining open blocks.
@dedent @dent
@carp "missing `#that`" if @closes.pop!
if @inter
then @rest ? @carp 'unterminated interpolation'
else @last.spaced = true; @newline!
# Rewrite the token stream unless explicitly asked not to.
o.raw or @rewrite!
@tokens
# The current indentation level.
dent: 0
#### Tokenizers
# Matches an identifying literal: variables, keywords, accessors, etc.
doID: (code, index) ->
[input] = match = (ID <<< lastIndex: index)exec code
return 0 unless input
id = camelize match.1
if NONASCII.test id
try Function "var #id" catch @carp "invalid identifier \"#id\""
{last} = this
# `id:_` `_.id` `@id`
if match.2 or last.0 is \DOT or @adi!
@token \ID if id of KEYWORDS then Object(id) <<< {+reserved} else id
@token \: \: if match.2
return input.length
# keywords
switch id
case \true \false \null \void \arguments \debugger then tag = \LITERAL
case \new \do \typeof \delete then tag = \UNARY
case \return \throw then tag = \HURL
case \break \continue then tag = \JUMP
case \this \eval \super then return @token(\LITERAL id, true)length
case \for
id = []
@fset \for true
@fset \to false
case \then
@fset \for false
@fset \to false
case \catch \function then id = ''
case \in \of
if @fget \for
@fset \for false
if id is \of
@fset \by true
# OF holds the index variable.
id = ''
if last.0 is \ID and @tokens[*-2]0 of <[ , ] } ]>
id = @tokens.pop!1
@tokens.pop! if @tokens[*-1]0 is \,
break
fallthrough
case \instanceof
id = @tokens.pop!1 + id if last.1 is \!
tag = \RELATION
case \not
return (last.1 = \!==; 3) if last.alias and last.1 is \===
tag = \UNARY; id = \!
case \and \or \is
@unline!
if id is \is
then @token \COMPARE \===
else @token \LOGIC if id is \or then \|| else \&&
@last.alias = true
return id.length
case \unless then tag = \IF
case \until then tag = \WHILE
case \import
if able @tokens then id = \<<<; break
fallthrough
case \export \const \var then tag = \DECL
case \let \own
if last.0 is \FOR and id not of last.1
last.1.push id
return 3
fallthrough
default
break if id of KEYWORDS_SHARED
@carp "reserved word \"#id\"" if id of KEYWORDS_UNUSED
if not last.1 and last.0 of <[ CATCH FUNCTION LABEL ]>
last <<< {1: id, -spaced}
return input.length
tag = \ID
# contextual keywords (reserved only in specific places)
switch id
case \all
if last.1 is \<<< and \<
or last.1 is \import and \All
last.1 += that
return 3
case \from then @forange! and tag = \FROM
case \to \til
@forange! and @tokens.push [\FROM '' @line] [\STRNUM \0 @line]
if @fget \from
@fset \from false
@fset \by true
tag = \TO
else if last.0 is \STRNUM and not last.callable
last <<< 0:\RANGE op:id
return id.length
case \by
if last.0 is \STRNUM and @tokens[*-2]0 is \RANGE
tag = \RANGE_BY
else if @fget \by
tag = \BY
@fset \by false
case \ever then if last.0 is \FOR
@fset \for false
last.0 = \WHILE; tag = \LITERAL; id = \true
tag ||= match.1.toUpperCase!
@unline! if tag of <[ RELATION THEN ELSE CASE DEFAULT CATCH FINALLY
IN OF FROM TO BY EXTENDS IMPLEMENTS ]>
if tag of <[ THEN IF WHILE ]>
@fset \for false
@fset \by false
@token tag, id
input.length
# Matches a number, including decimal, hex and exponential notation.
doNumber: (code, NUMBER.lastIndex) ->
return 0 unless input = (match = NUMBER.exec code)0
{last} = this
# `. 0.0` => `. 0 . 0`
if match.5 and (last.0 is \DOT or @adi!)
@token \STRNUM match.4.replace NUMBER_OMIT, ''
return match.4.length
if radix = match.1
num = parseInt rnum = match.2.replace(NUMBER_OMIT, ''), radix
if isNaN num or num is parseInt rnum.slice(0 -1), radix
@carp "invalid number #rnum in base #radix"
num += ''
else
num = (match.3 or input)replace NUMBER_OMIT, ''
if match.3 and num.charAt! is \0 and num.charAt(1) not of ['' \.]
@carp "deprecated octal literal #{match.4}"
if not last.spaced and last.0 is \+-
last.0 = \STRNUM; last.1 += num
return input.length
@strnum num
input.length
# Matches a string literal.
doString: (code, index, q) ->
if q is code.charAt index+1
return if q is code.charAt index+2
then @doHeredoc code, index, q
else @strnum q+q; 2
if q is \"
parts = @interpolate code, index, q
@addInterpolated parts, unlines
return 1 + parts.size
str = (SIMPLESTR <<< lastIndex: index)exec code .0
or @carp 'unterminated string'
@strnum unlines @string q, str.slice 1 -1
@countLines(str)length
# Matches heredocs, adjusting indentation to the correct level,
# as they ignore indentation to the left.
doHeredoc: (code, index, q) ->
if q is \'
~(end = code.indexOf q+q+q, index+3) or @carp 'unterminated heredoc'
raw = code.slice index+3 end
# Remove trailing indent.
doc = raw.replace LASTDENT, ''
@strnum enlines @string q, lchomp detab doc, heretabs doc
return @countLines(raw)length + 6
parts = @interpolate code, index, q+q+q
tabs = heretabs code.slice(index+3 index+parts.size)replace LASTDENT, ''
for t, i of parts then if t.0 is \S
t.1.=replace LASTDENT, '' if i+1 is parts.length
t.1 = detab t.1, tabs
t.1 = lchomp t.1 if i is 0
@addInterpolated parts, enlines
3 + parts.size
# Matches block comments.
doComment: (code, index) ->
comment = if ~end = code.indexOf \*/ index+2
then code.slice index, end+2
else code.slice(index) + \*/
if @last.0 of <[ NEWLINE INDENT THEN ]>
@token \COMMENT detab comment, @dent
@token \NEWLINE \\n
@countLines(comment)length
# Matches embedded JavaScript.
doJS: (code, JSTOKEN.lastIndex) ->
[lit,, js] = JSTOKEN.exec code
lit or @carp 'unterminated JS literal'
@token \LITERAL Object(detab js, @dent) <<< {+js}, true
@countLines(lit)length
# Matches a regular expression literal aka _regex_,
# disambiguating from division operators.
doRegex: (code, index) ->
# To coexist with implicit call and ACI,
# disallow leading space or equal sign when applicable.
#
# f /re/ 9 /ex/ # f(/re/, 9, /ex/)
# a /= b / c / d # division
#
if divisible = able @tokens or @last.0 is \CREMENT
return 0 if not @last.spaced or code.charAt(index+1) of [' ' \=]
[input, body, flag] = (REGEX <<< lastIndex: index)exec code
if input
then @regex body, flag
else divisible or @carp 'unterminated regex'
input.length
# Matches a multiline, extended regex literal.
doHeregex: (code, index) ->
{tokens, last} = this
parts = @interpolate code, index, \//
rest = code.slice index + 2 + parts.size
flag = @validate /^(?:[gimy]{1,4}|[?$]?)/exec(rest)0
if parts.1
if flag is \$
@adi!; @token \( \"
else
tokens.push [\ID \RegExp last.2] [\CALL( '' last.2]
if flag is \?
for t, i of parts by -1 then if t.0 is \TOKENS
dynaflag = parts.splice(i, 1).0.1
break
for t, i of parts
if t.0 is \TOKENS
tokens.push ...t.1
else
val = t.1.replace HEREGEX_OMIT, ''
continue if one and not val
one = tokens.push t <<< [\STRNUM @string \' enslash val]
tokens.push [\+- \+ tokens[*-1]2]
--tokens.length
if dynaflag or flag >= \g
@token \, \,
if dynaflag then tokens.push ...dynaflag else @token \STRNUM "'#flag'"
@token (if flag is \$ then \) else \)CALL), ''
else @regex reslash(parts.0.1.replace HEREGEX_OMIT, ''), flag
2 + parts.size + flag.length
# Matches a word literal, or ignores a sequence of whitespaces.
doBackslash: (code, BSTOKEN.lastIndex) ->
[input, word] = BSTOKEN.exec code
if word then @strnum @string \' word else @countLines input
input.length
# Matches newlines and {in,de}dents, determining which is which.
# If we can detect that the current line is continued onto the next line,
# then the newline is suppressed:
#
# elements
# .map -> ...
# .get()
#
# Keeps track of the level of indentation, because a single dedent
# can close multiple indents, so we need to know how far in we happen to be.
doLine: (code, index) ->
[input, tabs] = (MULTIDENT <<< lastIndex: index)exec code
{length} = @countLines input
{last} = this; last <<< {+eol, +spaced}
return length if index + length >= code.length
if tabs and (@emender ||= //[^#{ tabs.charAt! }]//)exec tabs
@carp "contaminated indent #{ escape that }"
if 0 > delta = tabs.length - @dent
@dedent -delta
@newline!
else
[tag, val] = last
if tag is \ASSIGN and val+'' not of <[ = := += ]>
or tag is \BITWISE and val is not \&
or tag of <[ +- DOT LOGIC MATH COMPARE RELATION SHIFT
IN OF TO BY FROM EXTENDS IMPLEMENTS ]>
return length
if delta then @indent delta else @newline!
@fset \for false
@fset \by false
length
# Consumes non-newline whitespaces and/or a line comment.
doSpace: (code, SPACE.lastIndex) ->
@last.spaced = true if input = SPACE.exec(code)0
input.length
# We treat all other single characters as a token. e.g.: `( ) , . !`
# Multi-character operators are also literal tokens, so that Jison can assign
# the proper order of operations.
# There are some symbols that we tag specially here.
# `;` and newlines are both treated as a NEWLINE, we distinguish parentheses
# that indicate a method call from regular parentheses, and so on.
doLiteral: (code, index) ->
return 0 unless sym = (SYMBOL <<< lastIndex: index)exec(code)0
switch tag = val = sym
case \+ \- then tag = \+-
case \&& \|| then tag = \LOGIC
case \? \!? then tag = \LOGIC if @last.spaced
case \/ \% \** then tag = \MATH
case \++ \-- then tag = \CREMENT
case \<<< \<<<< then tag = \IMPORT
case \;
@fset \by false
tag = \NEWLINE
case \.
@last.0 = \? if @last.1 is \?
tag = \DOT
case \,
switch @last.0
case \, \[ \( \CALL( then @token \LITERAL \void
case \FOR \OWN then @token \ID ''
case \!=
unless able @tokens or @last.0 is \CREMENT
@tokens.push [\UNARY \! @line] [\ASSIGN \= @line]
return 2
fallthrough
case <[ === !== < > <= >= == ]> then tag = \COMPARE
case <[ << >> >>> <? >? ]> then tag = \SHIFT
case \(
unless @last.0 of <[ FUNCTION LET ]> or @able true
@token \( \(
@closes.push \)
@parens.push @last
return 1
tag = \CALL(
@closes.push \)CALL
case \[ \{
@adi!
@closes.push ']}'charAt val is \{
case \}
if @inter and val is not @closes[*-1]
@rest = code.slice index+1
return 9e9
fallthrough
case \] \)
if \) is tag = val = @pair val
if @last is @lpar = @parens.pop!
@last.0 = \CALL(
tag = \)CALL
case \:
switch @last.0
case \ID \STRNUM \) then break
case \... then @last.0 = \STRNUM
default tag = \LABEL; val = ''
case <[ = := += -= *= /= %= &= ^= |= <<= >>= >>>= <?= >?= **= ]>
if @last.1 is \. or @last.0 is \? and @adi!
@last.1 += val
return val.length
if @last.0 is \LOGIC
(val = Object val)logic = @tokens.pop!1
else if val of <[ += -= ^= ]> and not able @tokens and
@last.0 not of <[ +- ^ UNARY LABEL ]>
@token \UNARY val.charAt!; val = \=
tag = \ASSIGN
case \*
return that if @last.0 of <[ NEWLINE INDENT THEN ]>
and @doInlinedent code, index+1, \list
tag = if able @tokens
or @last.0 is \CREMENT and able @tokens, @tokens.length-1
then \MATH else \STRNUM
case \@ \@@
@dotcat val or if val is \@
then @token \LITERAL \this true
else @token \LITERAL \arguments
return val.length
case \!
switch then unless @last.spaced
if able @tokens, null true
@token \CALL( \!
@token \)CALL \)
else if @last.1 is \typeof
@last.1 = \classof
else break
return 1
tag = \UNARY
case \<>
@token \LITERAL \<> true
return 2
case \&
unless able @tokens
@token \LITERAL \& true
return 1
fallthrough
case \| then tag = \BITWISE
case \~
return 1 if @dotcat val
tag = \UNARY
case \-> \~> then up = \->; fallthrough
case \<- \<~ then @parameters tag = up || \<-
case \:: then up = \prototype; fallthrough
case \.. then @adi!; tag = \ID; val = up || \constructor
case \=>
@unline!
@fset \for false
return 1 + that if @doInlinedent code, index+2
tag = \THEN
default if \< is val.charAt 0
@carp 'unterminated words' if val.length < 4
@token \WORDS, val, @adi!
return val.length
@unline! if tag of <[ , |> DOT LOGIC COMPARE MATH IMPORT SHIFT BITWISE ]>
@token tag, val
sym.length
doInlinedent: (code, index, list) ->
return 0 unless d = (INLINEDENT <<< lastIndex: index)exec code .0.length
list and @tokens.push [\LITERAL \void @line] [\ASSIGN \= @line]
@indent index + d - @dent - 2 - code.lastIndexOf \\n index
d
#### Token Manipulators
# Adds a token to the results,
# taking note of the line number and returning `value`.
token: (tag, value, callable) ->
@tokens.push @last = [tag, value, @line]
@last.callable = true if callable
value
# Records an INDENT.
indent: !(delta) ->
@dent += delta
@dents .push @token \INDENT delta
@closes.push \DEDENT
# Records DEDENTs against matching INDENTs.
dedent: !(debt) ->
@dent -= debt
while debt > 0 and dent = @dents.pop!
if debt < dent and not @inter
@carp "unmatched dedent (#debt for #dent)"
@pair \DEDENT
debt -= if typeof dent is \number then @token \DEDENT dent else dent
# Generates a newline token. Consecutive newlines get merged together.
newline: !-> @last.1 is \\n or
@tokens.push @last = [\NEWLINE \\n @line] <<< {+spaced}
# Cancels an immediate newline.
unline: !->
return unless @tokens.1
switch @last.0
case \INDENT
# Mark the last indent as dummy.
@dents[*-1] += ''
case \NEWLINE
return if @last.1 is \;
default return
@last = @tokens[--*-1]
# (Re)tags function parameters.
parameters: !(arrow) ->
if @last.0 is \) is @last.1
@lpar.0 = \PARAM(
@last.0 = \)PARAM
return
if arrow is \-> then @token \PARAM( '' else
break if t.0 of <[ NEWLINE INDENT THEN ( ]> for t, i of @tokens by -1
@tokens.splice i+1 0 [\PARAM( '' t.2]
@token \)PARAM ''
# Expands variables and expressions inside double-quoted strings or heregexes
# using Ruby-like notation for substitution of arbitrary expressions.
#
# "Hello #{name.capitalize()}."
#
# Will recursively create a new lexer for each interpolation,
# tokenizing the contents and merging them into the token stream.
interpolate: !(str, idx, end) ->
parts = []; end0 = end.charAt 0; pos = 0; i = -1
str.=slice idx + end.length
while ch = str.charAt ++i
switch ch
case end0
continue unless end is str.slice i, i + end.length
parts.push [\S; @countLines str.slice 0 i; @line]
return parts <<< size: pos + i + end.length
case \#
c1 = str.charAt i+1
id = c1 of <[ @ & ]> and c1
or c1 is \< and \> is str.charAt i+2 and \<>
or (ID <<< lastIndex: i+1)exec str .1
continue unless id or c1 is \{
case \\ then ++i; fallthrough
default continue
# `"#a#{b || c}"` => `a + "" + (b || c)`
if i or nested and not stringified
stringified = parts.push [\S; @countLines str.slice 0 i; @line]
if id
{length} = id
id = \this if id is \@
if id of <[ this & <> ]>
tag = \LITERAL
else
id = camelize id
try Function "'use strict'; var #id" catch
@carp "invalid variable interpolation \"#id\""
tag = \ID
str.=slice delta = i + 1 + length
parts.push [\TOKENS nested = [[tag, id, @line]]]
else
clone = ^exports <<< {+inter, @emender}
nested = clone.tokenize str.slice(i+2), {@line, +raw}
delta = str.length - clone.rest.length
{rest: str, @line} = clone
nested.shift! while nested.0?0 is \NEWLINE
if nested.length
nested.unshift [\( \( nested.0.2]
nested.push [\) \) @line]
parts.push [\TOKENS nested]
pos += delta; i = -1
@carp "missing `#end`"
# Merges `@interpolate`d strings.
addInterpolated: !(parts, nlines) ->
return @strnum nlines @string \" parts.0.1 unless parts.1
{tokens, last} = this
[left, right, joint] = if not last.spaced and last.1 is \%
--tokens.length
@last = last = tokens[*-1]
[\[ \] [\, \,]]
else
[\( \) [\+- \+]]
callable = @adi!
tokens.push [left, \", last.2]
for t, i of parts
if t.0 is \TOKENS
tokens.push ...t.1
else
continue if i > 1 and not t.1
tokens.push [\STRNUM; nlines @string \" t.1; t.2]
tokens.push joint.concat tokens[*-1]2
--tokens.length
@token right, '', callable
# Records a string/number token, supplying implicit dot if applicable.
strnum: !-> @token \STRNUM it, @adi! || @last.0 is \DOT
# Records a regex token.
regex: (body, flag) ->
try RegExp body catch @carp e.message
return @strnum @string \' enslash body if flag is \$
@token \LITERAL "/#{ body or '(?:)' }/#{ @validate flag }"
# Supplies an implicit DOT if applicable.
adi: ->
return if @last.spaced
if @last.0 is \!?
@last.0 = \CALL(
@token \)CALL ''
@token \? \?
else unless able @tokens
return
@token \DOT \.
# Resolves `.~` etc.
dotcat: -> @last.1 += it if @last.1 is \. or @adi!
# Pairs up a closing token.
pair: ->
unless it is (wanted = @closes[*-1]) or \)CALL is wanted and it is \)
@carp "unmatched `#it`" unless \DEDENT is wanted
# Auto-close DEDENT to support code like:
#
# [ a
# b ]
#
@dedent @dents[*-1]
return @pair it
@unline!
@closes.pop!
#### Helpers
# Checks if the last token is
#
# - `f()`: `call`able via explicit parentheses.
# - `x''`: indexable via implicit dots.
able: (call) -> not @last.spaced and able @tokens, null call
# Increments `@line` by the number of newlines in a string.
countLines: -> ++@line while pos = 1 + it.indexOf \\n pos; it
# Checks FOR for FROM/TO.
forange: ->
if @tokens[* - 2 - (@last.0 of <[NEWLINE INDENT]>)]?0 is \FOR
@fset \for false
@fset \from true
return true
false
# Complains on bad regex flag.
validate: (flag) ->
if flag and /(.).*\1/exec flag
@carp "duplicate regex flag `#{that.1}`"
flag
# {G,S}ets a per-nest flag.
fget: (key) ->
@flags[@closes.length]?[key]
fset: (key, val) ->
@flags@[@closes.length][key] = val
# Throws a syntax error with the current line number.
carp: !-> carp it, @line
string: (q, body) -> string q, body, @line
#### Helpers
function carp msg, lno then throw SyntaxError "#msg on line #{-~lno}"
# Checks whether or not the previous token is {index,`call`}able.
function able tokens, i ? tokens.length, call
[tag] = token = tokens[i-1]
return true if tag of <[ ID ] ? ]>
or tag is \BITWISE and token.1 is \& and
not token.spaced and tokens[i-2]spaced
if call then token.callable or tag of <[ ) )CALL ]> and token.1
else tag of <[ } ) )CALL STRNUM LITERAL WORDS ]>
#### String Manipulators
# Constructs a string literal by (un)escaping quotes etc.
string = let do
re = // ['"] |
\\ (?: ([0-3]?[0-7]{2} | [1-7] | 0(?=[89]))
| x[\dA-Fa-f]{2} | u[\dA-Fa-f]{4} | ([xu])
| [\\0bfnrtv] | [^\n\S] | ([\w\W])
)? //g
then (q, body, lno) ->
body.=replace re, (it, oct, xu, rest) ->
return \\ + it if it of [q, \\]
# Convert octal to hex for strict mode.
return \\\x + (0x100 + parseInt oct, 8)toString 16 .slice 1 if oct
carp 'malformed character escape sequence' lno if xu
if not rest or q is rest then it else rest
q + body + q
# Detects the minimum indent count for a heredoc, ignoring empty lines.
function heretabs doc
dent = 0/0
dent <?= that.0.length - 1 while TABS.exec doc
dent
TABS = /\n(?!$)[^\n\S]*/mg
# Erases all external indentations up to specified length.
function detab str, len
if len then str.replace detab[len]||=//\n[^\n\S]{1,#len}//g \\n else str
function replacer re, to then -> it.replace re, to
# Erases all newlines and indentations.
unlines = replacer /\n[^\n\S]*/g ''
# Converts newlines/backslashes to their quoted form.
enlines = replacer /\n/g \\\n
enslash = replacer /\\/g \\\\
# Quotes slashes unless already quoted.
reslash = replacer /(\\.)|\//g -> @@1 or \\\/
# Transforms hyphenated-words to camelCase.
camelize = replacer /-[a-z]/ig -> it.char-at 1 .to-upper-case!
# Deletes the first character if newline.
function lchomp then it.slice 1 + it.lastIndexOf \\n 0
function decode val, lno
return [+val] unless isNaN val
val = if val.length > 8 then \ng else do Function \return + val
val.length is 1 or carp 'bad string in range' lno
[val.charCodeAt!, true]
function uxxxx then \"\\u + (\000 + it.toString 16)slice(-4) + \"
character = if JSON!? then uxxxx else ->
switch it case 0x2028 0x2029 then uxxxx it
default JSON.stringify String.fromCharCode it
#### Rewriters
# The Coco language has a good deal of optional, implicit, and/or shorthand
# syntax. This can greatly complicate a grammar and bloat the resulting parse
# table. Instead of making the parser handle it all, we take a series of passes
# over the token stream, convert shorthand into the unambiguous long form,
# add implicit indentation and parentheses, and generally clean things up.
# - Tag postfix conditionals.
# - Fill in empty blocks for bodyless `class`es.
!function rewriteBlockless tokens
for [tag]:token, i of tokens
detectEnd tokens, i+1, ok, go if tag of <[ IF CLASS ]>
function ok then it.0 of <[ NEWLINE INDENT ]>
!function go it, i
if tag is \IF
token.0 = \POST_IF if it.0 is not \INDENT
or not it.1 and not it.then
or tokens[i-1]0 of BLOCK_USERS
else unless it.0 is \INDENT
tokens.splice i, 0 [\INDENT 0 lno = tokens[i-1]2] [\DEDENT 0 lno]
# Wrap single-line blocks with regular INDENT/DEDENT pairs.
# Because our grammar is LALR(1), it can't handle some sequences
# that lack ending delimiters.
!function addImplicitIndentation tokens
i = 0
while token = tokens[++i]
[tag] = token
continue unless tag of
<[ -> THEN ELSE DEFAULT TRY CATCH FINALLY DECL ]>
switch next = tokens[i+1]0
case \IF then continue if tag is \ELSE
case \INDENT \THEN
tokens.splice i-- 1 if tag is \THEN
continue
indent = [\INDENT 0 token.2]; dedent = [\DEDENT 0]
if tag is \THEN
then (tokens[i] = indent)then = true
else tokens.splice ++i, 0 indent
switch
case tag is \DECL then break
# ->,
case next of <[ DOT ? , |> ]> then --i; fallthrough
# -> 0,
case next of <[ ID STRNUM LITERAL ]> and \, is tokens[i+2]?0
go 0 i+=2; ++i
continue
# -> [0],
case next of <[ ( [ { ]>
and \, is tokens[idx = 1 + indexOfPair tokens, i+1]?0
go 0 idx; ++i
continue
detectEnd tokens, i+1, ok, go
function ok [t0]:token, i
# Handle nesting intuitively:
# `try try a finally b` => `try (try a finally b)`
t = tag
tag := '' if tag is t0 or tag is \THEN and t0 is \SWITCH
switch t0
case \NEWLINE then token.1 is not \;
case \DOT \? \, \|> then tokens[i-1]eol
case \ELSE then t is \THEN
case \CATCH then t is \TRY
case \FINALLY then t of <[ TRY CATCH THEN ]>
case \CASE \DEFAULT then t of <[ CASE THEN ]>
!function go [] i
prev = tokens[i-1]
tokens.splice if prev.0 is \, then i-1 else i, 0, dedent <<< {prev.2}
# Functions may be optionally called without parentheses for simple cases.
# Insert the missing parentheses here to aid the parser.
!function addImplicitParentheses tokens
i = 0; brackets = []
while token = tokens[++i]
if token.1 is \do and tokens[i+1]0 is \INDENT
endi = indexOfPair tokens, i+1
if tokens[endi+1]0 is \NEWLINE and tokens[endi+2]?0 is \WHILE
token.0 = \DO
tokens[endi+2]done = true
tokens.splice endi+1 1
else
(token = tokens[1+ i])0 = \(
(tpair = tokens[endi])0 = \)
token.doblock = true
tokens.splice i, 1
[tag] = token; prev = tokens[i-1]
tag is \[ and brackets.push prev.0 is \DOT
if prev.0 is \]
if brackets.pop! then prev.index = true else continue
if tag is \BITWISE and token.1 is \& and not exp tokens[i+1]
tag = token.0 = \LITERAL
continue unless prev.0 of <[ FUNCTION LET ]>
or prev.spaced and able tokens, i, true
if token.doblock
token.0 = \CALL(
tpair.0 = \)CALL
continue
continue unless exp token
if tag is \CREMENT
continue if token.spaced or tokens[i+1]0 not of CHAIN
skipBlock = seenSwitch = false
tokens.splice i++, 0 [\CALL( '' token.2]
detectEnd tokens, i, ok, go
# Does the token start an expression?
function exp [tag]:token
tag of ARG or not token.spaced and tag of <[ +- ^ ]>
function ok token, i
tag = token.0
return true if tag of <[ |> POST_IF ]>
unless skipBlock
return true if token.alias and token.1 of <[ && || ]>
or tag of <[ TO BY IMPLEMENTS ]>
pre = tokens[i-1]
switch tag
case \NEWLINE
return pre.0 is not \,
case \DOT \?
return not skipBlock and (pre.spaced or pre.0 is \DEDENT)
case \SWITCH then seenSwitch := true; fallthrough
case \IF \CLASS \FUNCTION \LET \WITH then skipBlock := true
case \CASE
if seenSwitch then skipBlock := true else return true
case \INDENT
return skipBlock := false if skipBlock
return pre.0 not of BLOCK_USERS
case \WHILE
return false if token.done
fallthrough
case \FOR
skipBlock := true
return able tokens, i
or pre.0 is \CREMENT
or pre.0 is \... and pre.spaced
false
!function go token, i then tokens.splice i, 0 [\)CALL '' tokens[i-1]2]
# Object literals may be written without braces for simple cases.
# Insert the missing braces here to aid the parser.
!function addImplicitBraces tokens
stack = []; i = 0
while token = tokens[++i]
unless \: is tag = token.0
switch
case tag of CLOSERS then start = stack.pop!
case tag of OPENERS
tag = \{ if tag is \INDENT and tokens[i-1]0 is \{
stack.push [tag, i]
continue
paren = tokens[i-1]0 is \)
index = if paren then start.1 else i-1
pre = tokens[index-1]
continue unless pre.0 of <[ : ASSIGN IMPORT ]> or stack[*-1]?0 is not \{
stack.push [\{]
inline = not pre.doblock and pre.0 not of <[ NEWLINE INDENT ]>
index -= 2 while tokens[index-2]?0 is \COMMENT
tokens.splice index, 0 [\{ \{ tokens[index]2]
detectEnd tokens, ++i+1, ok, go
function ok token, i
switch tag = token.0
case \, then break
case \NEWLINE then return true if inline
case \DEDENT then return true
case \POST_IF \FOR \WHILE \|> then return inline
default return false
t1 = tokens[i+1]?0
t1 is not (if tag is \, then \NEWLINE else \COMMENT) and
\: is not tokens[if t1 is \( then 1 + indexOfPair tokens, i+1 else i+2]?0
!function go token, i then tokens.splice i, 0 [\} '' token.2]
# - Slip unary {pl,min}uses off signed numbers.
# - Expand ranges and words.
# - Insert `()` after each `function`/`let` facing a block.
# - Insert `, ` after each non-callable token facing an argument token.
!function expandLiterals tokens
i = 0
while token = tokens[++i]
switch token.0
case \STRNUM
if ~'-+'indexOf sig = token.1.charAt 0
token.1.=slice 1
tokens.splice i++ 0 [\+- sig, token.2]
continue if token.callable
case \RANGE
next = tokens[i+1]
lno = token.2
[from, char] = decode token.1, lno
[to, tochar] = next.0 is \STRNUM and decode next.1, lno
carp 'bad "to" in range' lno if to!? or char ^ tochar
by = 1
if byp = tokens[i+2]?0 is \RANGE_BY
carp 'bad "by" in range' tokens[i+2]2 unless by = +tokens[i+3]?1
else if from > to
by = -1
ts = []
enc = if char then character else String
add = !->
if 0x10000 < ts.push [\STRNUM enc n; lno] [\, \, lno]
carp 'range limit exceeded' lno
if token.op is \to
then add! for n from from to to by by
else add! for n from from til to by by
ts.pop! or carp 'empty range' lno
tokens.splice i, 2 + 2 * byp, ...ts
i += ts.length - 1
case \WORDS
ts = [[\[ \[ lno = token.2]]
for word of token.1.slice 2 -2 .match /\S+/g or ''
ts.push [\STRNUM; string \', word, lno; lno] [\, \, lno]
tokens.splice i, 1, ...ts, [\] \] lno]
i += ts.length
case \INDENT
if tokens[i-1]
if that.1 is \new
tokens.splice i++ 0 \
[\PARAM( '' token.2] [\)PARAM '' token.2] [\-> '' token.2]
else if that.0 of <[ FUNCTION LET ]>
tokens.splice i, 0 [\CALL( '' token.2] [\)CALL '' token.2]
i += 2
continue
case \LITERAL \} \!? then break
case \) \)CALL then continue if token.1
case \] then continue if token.index
case \CREMENT then continue unless able tokens, i
default continue
if token.spaced and tokens[i+1]0 of ARG
tokens.splice ++i, 0 [\, \, token.2]
# Seeks `tokens` from `i`ndex and `go` for a token of the same level
# that's `ok` or an unmatched closer.
!function detectEnd tokens, i, ok, go
levels = 0
while token = tokens[i], ++i
return go token, i if not levels and ok token, i
[tag] = token
return go token, i if 0 > levels += tag of OPENERS or -(tag of CLOSERS)
function indexOfPair tokens, i
level = 1; end = INVERSES[start = tokens[i]0]
while tokens[++i]
switch that.0
case start then ++level
case end then return i unless --level
-1
#### Constants
##### Keywords