4
4
module Opal
5
5
class Lexer
6
6
7
+ STR_FUNC_ESCAPE = 0x01
8
+ STR_FUNC_EXPAND = 0x02
9
+ STR_FUNC_REGEXP = 0x04
10
+ STR_FUNC_QWORDS = 0x08
11
+ STR_FUNC_SYMBOL = 0x10
12
+ STR_FUNC_INDENT = 0x20
13
+
14
+ STR_SQUOTE = 0x00
15
+ STR_DQUOTE = STR_FUNC_EXPAND
16
+ STR_XQUOTE = STR_FUNC_EXPAND
17
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
18
+ STR_SWORD = STR_FUNC_QWORDS
19
+ STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND
20
+ STR_SSYM = STR_FUNC_SYMBOL
21
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
22
+
7
23
attr_reader :line , :scope_line , :scope
8
24
9
25
attr_accessor :lex_state , :strterm , :scanner
@@ -137,8 +153,8 @@ def strterm_expand?(strterm)
137
153
[ :dquote , :dsym , :dword , :heredoc , :xquote , :regexp ] . include? type
138
154
end
139
155
140
- def new_strterm ( type , start , finish )
141
- { :type => type , :beg => start , :end => finish }
156
+ def new_strterm ( func , term , paren )
157
+ { :type => :string , :func => func , :term => term , :paren => paren }
142
158
end
143
159
144
160
def new_strterm2 ( type , start , finish )
@@ -210,11 +226,11 @@ def peek_variable_name
210
226
end
211
227
212
228
def here_document ( str_parse )
213
- eos_regx = /[ \t ]*#{ Regexp . escape ( str_parse [ :end ] ) } (\r *\n |$)/
229
+ eos_regx = /[ \t ]*#{ Regexp . escape ( str_parse [ :term ] ) } (\r *\n |$)/
214
230
expand = true
215
231
216
232
if check ( eos_regx )
217
- scan ( /[ \t ]*#{ Regexp . escape ( str_parse [ :end ] ) } / )
233
+ scan ( /[ \t ]*#{ Regexp . escape ( str_parse [ :term ] ) } / )
218
234
219
235
if str_parse [ :scanner ]
220
236
@scanner_stack << str_parse [ :scanner ]
@@ -262,21 +278,21 @@ def here_document(str_parse)
262
278
263
279
def parse_string
264
280
str_parse = self . strterm
281
+ func = str_parse [ :func ]
282
+
265
283
scanner = @scanner
266
284
space = false
267
285
268
- expand = strterm_expand? ( str_parse )
269
-
270
- words = [ 'w' , 'W' ] . include? str_parse [ :beg ]
286
+ words = ( func & STR_FUNC_QWORDS ) != 0
287
+ expand = ( func & STR_FUNC_EXPAND ) != 0
288
+ regexp = ( func & STR_FUNC_REGEXP ) != 0
271
289
272
- space = true if [ 'w' , 'W' ] . include? ( str_parse [ :beg ] ) and scan ( /\s +/ )
290
+ space = true if words and scan ( /\s +/ )
273
291
274
292
# if not end of string, so we must be parsing contents
275
293
str_buffer = [ ]
276
294
277
- # see if we can read end of string/xstring/regexp markers
278
- # if scan /#{str_parse[:end]}/
279
- if scan Regexp . new ( Regexp . escape ( str_parse [ :end ] ) )
295
+ if scan Regexp . new ( Regexp . escape ( str_parse [ :term ] ) )
280
296
if words && !str_parse [ :done_last_space ] #&& space
281
297
str_parse [ :done_last_space ] = true
282
298
pushback ( 1 )
@@ -287,7 +303,7 @@ def parse_string
287
303
if str_parse [ :balance ]
288
304
if str_parse [ :nesting ] == 0
289
305
290
- if str_parse [ :type ] == : regexp
306
+ if regexp
291
307
self . yylval = scan ( /\w +/ )
292
308
return :tREGEXP_END
293
309
end
@@ -297,18 +313,10 @@ def parse_string
297
313
str_parse [ :nesting ] -= 1
298
314
self . strterm = str_parse
299
315
end
300
-
301
- elsif [ '"' , "'" ] . include? str_parse [ :beg ]
302
- return :tSTRING_END
303
-
304
- elsif str_parse [ :beg ] == '`'
305
- return :tSTRING_END
306
-
307
- elsif str_parse [ :beg ] == '/' || str_parse [ :type ] == :regexp
316
+ elsif regexp
308
317
@lex_state = :expr_end
309
318
self . yylval = scan ( /\w +/ )
310
319
return :tREGEXP_END
311
-
312
320
else
313
321
if str_parse [ :scanner ]
314
322
@scanner_stack << str_parse [ :scanner ]
@@ -324,7 +332,7 @@ def parse_string
324
332
return :tSPACE
325
333
end
326
334
327
- if str_parse [ :balance ] and scan Regexp . new ( Regexp . escape ( str_parse [ :beg ] ) )
335
+ if str_parse [ :balance ] and scan Regexp . new ( Regexp . escape ( str_parse [ :paren ] ) )
328
336
str_buffer << scanner . matched
329
337
str_parse [ :nesting ] += 1
330
338
elsif check ( /#[@$]/ )
@@ -337,7 +345,6 @@ def parse_string
337
345
338
346
elsif scan ( /#\{ / )
339
347
if expand
340
- # we are into ruby code, so stop parsing content (for now)
341
348
return :tSTRING_DBEG
342
349
else
343
350
str_buffer << scanner . matched
@@ -358,14 +365,14 @@ def parse_string
358
365
end
359
366
360
367
def add_string_content ( str_buffer , str_parse )
368
+ func = str_parse [ :func ]
361
369
scanner = @scanner
362
- # regexp for end of string/regexp
363
- # end_str_re = /#{str_parse[:end]}/
364
- end_str_re = Regexp . new ( Regexp . escape ( str_parse [ :end ] ) )
365
370
366
- expand = strterm_expand? ( str_parse )
371
+ end_str_re = Regexp . new ( Regexp . escape ( str_parse [ :term ] ) )
367
372
368
- words = [ 'W' , 'w' ] . include? str_parse [ :beg ]
373
+ words = ( func & STR_FUNC_QWORDS ) != 0
374
+ expand = ( func & STR_FUNC_EXPAND ) != 0
375
+ regexp = ( func & STR_FUNC_REGEXP ) != 0
369
376
370
377
until scanner . eos?
371
378
c = nil
@@ -384,24 +391,19 @@ def add_string_content(str_buffer, str_parse)
384
391
break
385
392
end
386
393
387
- elsif str_parse [ :balance ] and scan Regexp . new ( Regexp . escape ( str_parse [ :beg ] ) )
394
+ elsif str_parse [ :balance ] and scan Regexp . new ( Regexp . escape ( str_parse [ :paren ] ) )
388
395
str_parse [ :nesting ] += 1
389
396
c = scanner . matched
390
397
391
398
elsif words && scan ( /\s / )
392
399
pushback ( 1 )
393
400
break
394
-
395
401
elsif expand && check ( /#(?=[\$ \@ \{ ])/ )
396
402
break
397
-
398
- #elsif scan(/\\\\/)
399
- #c = scanner.matched
400
403
elsif scan ( /\\ \n / )
401
404
c = "\n "
402
-
403
405
elsif scan ( /\\ / )
404
- if str_parse [ :type ] == : regexp
406
+ if regexp
405
407
if scan ( /(.)/ )
406
408
c = "\\ " + scanner . matched
407
409
end
@@ -414,11 +416,11 @@ def add_string_content(str_buffer, str_parse)
414
416
415
417
unless handled
416
418
reg = if words
417
- Regexp . new ( "[^#{ Regexp . escape str_parse [ :end ] } \# \0 \n \ \\ \\ ]+|." )
419
+ Regexp . new ( "[^#{ Regexp . escape str_parse [ :term ] } \# \0 \n \ \\ \\ ]+|." )
418
420
elsif str_parse [ :balance ]
419
- Regexp . new ( "[^#{ Regexp . escape str_parse [ :end ] } #{ Regexp . escape str_parse [ :beg ] } \# \0 \\ \\ ]+|." )
421
+ Regexp . new ( "[^#{ Regexp . escape str_parse [ :term ] } #{ Regexp . escape str_parse [ :paren ] } \# \0 \\ \\ ]+|." )
420
422
else
421
- Regexp . new ( "[^#{ Regexp . escape str_parse [ :end ] } \# \0 \\ \\ ]+|." )
423
+ Regexp . new ( "[^#{ Regexp . escape str_parse [ :term ] } \# \0 \\ \\ ]+|." )
422
424
end
423
425
424
426
scan reg
@@ -435,7 +437,8 @@ def add_string_content(str_buffer, str_parse)
435
437
def heredoc_identifier
436
438
if scan ( /(-?)['"]?(\w +)['"]?/ )
437
439
heredoc = @scanner [ 2 ]
438
- self . strterm = new_strterm ( :heredoc , heredoc , heredoc )
440
+ self . strterm = new_strterm ( STR_DQUOTE , heredoc , heredoc )
441
+ self . strterm [ :type ] = :heredoc
439
442
440
443
# if ruby code at end of line after heredoc, we have to store it to
441
444
# parse after heredoc is finished parsing
@@ -694,15 +697,15 @@ def yylex
694
697
return :tEQL
695
698
696
699
elsif scan ( /\" / )
697
- self . strterm = new_strterm ( :dquote , '"' , '"' )
700
+ self . strterm = new_strterm ( STR_DQUOTE , '"' , " \0 " )
698
701
return :tSTRING_BEG
699
702
700
703
elsif scan ( /\' / )
701
- self . strterm = new_strterm ( :squote , "'" , "' " )
704
+ self . strterm = new_strterm ( STR_SQUOTE , "'" , "\0 " )
702
705
return :tSTRING_BEG
703
706
704
707
elsif scan ( /\` / )
705
- self . strterm = new_strterm ( :xquote , '`' , '`' )
708
+ self . strterm = new_strterm ( STR_XQUOTE , "`" , " \0 " )
706
709
return :tXSTRING_BEG
707
710
708
711
elsif scan ( /\& / )
@@ -751,41 +754,39 @@ def yylex
751
754
752
755
elsif scan ( /\% [QqWwixr]/ )
753
756
str_type = scanner . matched [ 1 , 1 ]
754
- paren = scan ( /./ )
755
-
756
- term = case paren
757
- when '(' then ')'
758
- when '[' then ']'
759
- when '{' then '}'
760
- else paren
761
- end
762
-
763
- case str_type
764
- when 'Q'
765
- self . strterm = new_strterm2 ( :dquote , paren , term )
766
- return :tSTRING_BEG
767
- when 'q'
768
- self . strterm = new_strterm2 ( :squote , paren , term )
769
- return :tSTRING_BEG
770
- when 'W'
771
- self . strterm = new_strterm ( :dword , 'W' , term )
772
- skip ( /\s */ )
773
- return :tWORDS_BEG
774
- when 'w' , 'i'
775
- self . strterm = new_strterm ( :sword , 'w' , term )
776
- skip ( /\s */ )
777
- return :tAWORDS_BEG
778
- when 'x'
779
- self . strterm = new_strterm2 ( :xquote , paren , term )
780
- return :tXSTRING_BEG
781
- when 'r'
782
- self . strterm = new_strterm2 ( :regexp , paren , term )
783
- return :tREGEXP_BEG
757
+ paren = term = scan ( /./ )
758
+
759
+ case term
760
+ when '(' then term = ')'
761
+ when '[' then term = ']'
762
+ when '{' then term = '}'
763
+ else paren = "\0 "
784
764
end
785
765
766
+ token , func = case str_type
767
+ when 'Q'
768
+ [ :tSTRING_BEG , STR_DQUOTE ]
769
+ when 'q'
770
+ [ :tSTRING_BEG , STR_SQUOTE ]
771
+ when 'W'
772
+ skip ( /\s */ )
773
+ [ :tWORDS_BEG , STR_DWORD ]
774
+ when 'w' , 'i'
775
+ skip ( /\s */ )
776
+ [ :tAWORDS_BEG , STR_SWORD ]
777
+ when 'x'
778
+ [ :tXSTRING_BEG , STR_XQUOTE ]
779
+ when 'r'
780
+ [ :tREGEXP_BEG , STR_REGEXP ]
781
+
782
+ end
783
+
784
+ self . strterm = new_strterm2 ( func , term , paren )
785
+ return token
786
+
786
787
elsif scan ( /\/ / )
787
788
if beg?
788
- self . strterm = new_strterm ( :regexp , '/' , '/' )
789
+ self . strterm = new_strterm ( STR_REGEXP , '/' , '/' )
789
790
return :tREGEXP_BEG
790
791
elsif scan ( /\= / )
791
792
@lex_state = :expr_beg
@@ -794,7 +795,7 @@ def yylex
794
795
@lex_state = :expr_arg
795
796
elsif arg?
796
797
if !check ( /\s / ) && @space_seen
797
- self . strterm = new_strterm ( :regexp , '/' , '/' )
798
+ self . strterm = new_strterm ( STR_REGEXP , '/' , '/' )
798
799
return :tREGEXP_BEG
799
800
end
800
801
else
@@ -811,7 +812,7 @@ def yylex
811
812
if @lex_state == :expr_beg or ( @lex_state == :expr_arg && @space_seen )
812
813
start_word = scan ( /./ )
813
814
end_word = { '(' => ')' , '[' => ']' , '{' => '}' } [ start_word ] || start_word
814
- self . strterm = new_strterm2 ( :dquote , start_word , end_word )
815
+ self . strterm = new_strterm2 ( STR_DQUOTE , end_word , start_word )
815
816
return :tSTRING_BEG
816
817
end
817
818
end
@@ -923,9 +924,9 @@ def yylex
923
924
end
924
925
925
926
if scan ( /\' / )
926
- self . strterm = new_strterm ( :ssym , "'" , "' " )
927
+ self . strterm = new_strterm ( STR_SSYM , "'" , "\0 " )
927
928
elsif scan ( /\" / )
928
- self . strterm = new_strterm ( :dsym , '"' , '"' )
929
+ self . strterm = new_strterm ( STR_DSYM , '"' , " \0 " )
929
930
end
930
931
931
932
@lex_state = :expr_fname
0 commit comments