Skip to content

Commit 51f2d6f

Browse files
committed
Re-write string lexing to use func string types
1 parent 337de6c commit 51f2d6f

File tree

1 file changed

+78
-77
lines changed

1 file changed

+78
-77
lines changed

lib/opal/parser/lexer.rb

Lines changed: 78 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@
44
module Opal
55
class Lexer
66

7+
STR_FUNC_ESCAPE = 0x01
8+
STR_FUNC_EXPAND = 0x02
9+
STR_FUNC_REGEXP = 0x04
10+
STR_FUNC_QWORDS = 0x08
11+
STR_FUNC_SYMBOL = 0x10
12+
STR_FUNC_INDENT = 0x20
13+
14+
STR_SQUOTE = 0x00
15+
STR_DQUOTE = STR_FUNC_EXPAND
16+
STR_XQUOTE = STR_FUNC_EXPAND
17+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
18+
STR_SWORD = STR_FUNC_QWORDS
19+
STR_DWORD = STR_FUNC_QWORDS | STR_FUNC_EXPAND
20+
STR_SSYM = STR_FUNC_SYMBOL
21+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
22+
723
attr_reader :line, :scope_line, :scope
824

925
attr_accessor :lex_state, :strterm, :scanner
@@ -137,8 +153,8 @@ def strterm_expand?(strterm)
137153
[:dquote, :dsym, :dword, :heredoc, :xquote, :regexp].include? type
138154
end
139155

140-
def new_strterm(type, start, finish)
141-
{ :type => type, :beg => start, :end => finish }
156+
def new_strterm(func, term, paren)
157+
{ :type => :string, :func => func, :term => term, :paren => paren }
142158
end
143159

144160
def new_strterm2(type, start, finish)
@@ -210,11 +226,11 @@ def peek_variable_name
210226
end
211227

212228
def here_document(str_parse)
213-
eos_regx = /[ \t]*#{Regexp.escape(str_parse[:end])}(\r*\n|$)/
229+
eos_regx = /[ \t]*#{Regexp.escape(str_parse[:term])}(\r*\n|$)/
214230
expand = true
215231

216232
if check(eos_regx)
217-
scan(/[ \t]*#{Regexp.escape(str_parse[:end])}/)
233+
scan(/[ \t]*#{Regexp.escape(str_parse[:term])}/)
218234

219235
if str_parse[:scanner]
220236
@scanner_stack << str_parse[:scanner]
@@ -262,21 +278,21 @@ def here_document(str_parse)
262278

263279
def parse_string
264280
str_parse = self.strterm
281+
func = str_parse[:func]
282+
265283
scanner = @scanner
266284
space = false
267285

268-
expand = strterm_expand?(str_parse)
269-
270-
words = ['w', 'W'].include? str_parse[:beg]
286+
words = (func & STR_FUNC_QWORDS) != 0
287+
expand = (func & STR_FUNC_EXPAND) != 0
288+
regexp = (func & STR_FUNC_REGEXP) != 0
271289

272-
space = true if ['w', 'W'].include?(str_parse[:beg]) and scan(/\s+/)
290+
space = true if words and scan(/\s+/)
273291

274292
# if not end of string, so we must be parsing contents
275293
str_buffer = []
276294

277-
# see if we can read end of string/xstring/regexp markers
278-
# if scan /#{str_parse[:end]}/
279-
if scan Regexp.new(Regexp.escape(str_parse[:end]))
295+
if scan Regexp.new(Regexp.escape(str_parse[:term]))
280296
if words && !str_parse[:done_last_space]#&& space
281297
str_parse[:done_last_space] = true
282298
pushback(1)
@@ -287,7 +303,7 @@ def parse_string
287303
if str_parse[:balance]
288304
if str_parse[:nesting] == 0
289305

290-
if str_parse[:type] == :regexp
306+
if regexp
291307
self.yylval = scan(/\w+/)
292308
return :tREGEXP_END
293309
end
@@ -297,18 +313,10 @@ def parse_string
297313
str_parse[:nesting] -= 1
298314
self.strterm = str_parse
299315
end
300-
301-
elsif ['"', "'"].include? str_parse[:beg]
302-
return :tSTRING_END
303-
304-
elsif str_parse[:beg] == '`'
305-
return :tSTRING_END
306-
307-
elsif str_parse[:beg] == '/' || str_parse[:type] == :regexp
316+
elsif regexp
308317
@lex_state = :expr_end
309318
self.yylval = scan(/\w+/)
310319
return :tREGEXP_END
311-
312320
else
313321
if str_parse[:scanner]
314322
@scanner_stack << str_parse[:scanner]
@@ -324,7 +332,7 @@ def parse_string
324332
return :tSPACE
325333
end
326334

327-
if str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:beg]))
335+
if str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:paren]))
328336
str_buffer << scanner.matched
329337
str_parse[:nesting] += 1
330338
elsif check(/#[@$]/)
@@ -337,7 +345,6 @@ def parse_string
337345

338346
elsif scan(/#\{/)
339347
if expand
340-
# we are into ruby code, so stop parsing content (for now)
341348
return :tSTRING_DBEG
342349
else
343350
str_buffer << scanner.matched
@@ -358,14 +365,14 @@ def parse_string
358365
end
359366

360367
def add_string_content(str_buffer, str_parse)
368+
func = str_parse[:func]
361369
scanner = @scanner
362-
# regexp for end of string/regexp
363-
# end_str_re = /#{str_parse[:end]}/
364-
end_str_re = Regexp.new(Regexp.escape(str_parse[:end]))
365370

366-
expand = strterm_expand?(str_parse)
371+
end_str_re = Regexp.new(Regexp.escape(str_parse[:term]))
367372

368-
words = ['W', 'w'].include? str_parse[:beg]
373+
words = (func & STR_FUNC_QWORDS) != 0
374+
expand = (func & STR_FUNC_EXPAND) != 0
375+
regexp = (func & STR_FUNC_REGEXP) != 0
369376

370377
until scanner.eos?
371378
c = nil
@@ -384,24 +391,19 @@ def add_string_content(str_buffer, str_parse)
384391
break
385392
end
386393

387-
elsif str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:beg]))
394+
elsif str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:paren]))
388395
str_parse[:nesting] += 1
389396
c = scanner.matched
390397

391398
elsif words && scan(/\s/)
392399
pushback(1)
393400
break
394-
395401
elsif expand && check(/#(?=[\$\@\{])/)
396402
break
397-
398-
#elsif scan(/\\\\/)
399-
#c = scanner.matched
400403
elsif scan(/\\\n/)
401404
c = "\n"
402-
403405
elsif scan(/\\/)
404-
if str_parse[:type] == :regexp
406+
if regexp
405407
if scan(/(.)/)
406408
c = "\\" + scanner.matched
407409
end
@@ -414,11 +416,11 @@ def add_string_content(str_buffer, str_parse)
414416

415417
unless handled
416418
reg = if words
417-
Regexp.new("[^#{Regexp.escape str_parse[:end]}\#\0\n\ \\\\]+|.")
419+
Regexp.new("[^#{Regexp.escape str_parse[:term]}\#\0\n\ \\\\]+|.")
418420
elsif str_parse[:balance]
419-
Regexp.new("[^#{Regexp.escape str_parse[:end]}#{Regexp.escape str_parse[:beg]}\#\0\\\\]+|.")
421+
Regexp.new("[^#{Regexp.escape str_parse[:term]}#{Regexp.escape str_parse[:paren]}\#\0\\\\]+|.")
420422
else
421-
Regexp.new("[^#{Regexp.escape str_parse[:end]}\#\0\\\\]+|.")
423+
Regexp.new("[^#{Regexp.escape str_parse[:term]}\#\0\\\\]+|.")
422424
end
423425

424426
scan reg
@@ -435,7 +437,8 @@ def add_string_content(str_buffer, str_parse)
435437
def heredoc_identifier
436438
if scan(/(-?)['"]?(\w+)['"]?/)
437439
heredoc = @scanner[2]
438-
self.strterm = new_strterm(:heredoc, heredoc, heredoc)
440+
self.strterm = new_strterm(STR_DQUOTE, heredoc, heredoc)
441+
self.strterm[:type] = :heredoc
439442

440443
# if ruby code at end of line after heredoc, we have to store it to
441444
# parse after heredoc is finished parsing
@@ -694,15 +697,15 @@ def yylex
694697
return :tEQL
695698

696699
elsif scan(/\"/)
697-
self.strterm = new_strterm(:dquote, '"', '"')
700+
self.strterm = new_strterm(STR_DQUOTE, '"', "\0")
698701
return :tSTRING_BEG
699702

700703
elsif scan(/\'/)
701-
self.strterm = new_strterm(:squote, "'", "'")
704+
self.strterm = new_strterm(STR_SQUOTE, "'", "\0")
702705
return :tSTRING_BEG
703706

704707
elsif scan(/\`/)
705-
self.strterm = new_strterm(:xquote, '`', '`')
708+
self.strterm = new_strterm(STR_XQUOTE, "`", "\0")
706709
return :tXSTRING_BEG
707710

708711
elsif scan(/\&/)
@@ -751,41 +754,39 @@ def yylex
751754

752755
elsif scan(/\%[QqWwixr]/)
753756
str_type = scanner.matched[1, 1]
754-
paren = scan(/./)
755-
756-
term = case paren
757-
when '(' then ')'
758-
when '[' then ']'
759-
when '{' then '}'
760-
else paren
761-
end
762-
763-
case str_type
764-
when 'Q'
765-
self.strterm = new_strterm2(:dquote, paren, term)
766-
return :tSTRING_BEG
767-
when 'q'
768-
self.strterm = new_strterm2(:squote, paren, term)
769-
return :tSTRING_BEG
770-
when 'W'
771-
self.strterm = new_strterm(:dword, 'W', term)
772-
skip(/\s*/)
773-
return :tWORDS_BEG
774-
when 'w', 'i'
775-
self.strterm = new_strterm(:sword, 'w', term)
776-
skip(/\s*/)
777-
return :tAWORDS_BEG
778-
when 'x'
779-
self.strterm = new_strterm2(:xquote, paren, term)
780-
return :tXSTRING_BEG
781-
when 'r'
782-
self.strterm = new_strterm2(:regexp, paren, term)
783-
return :tREGEXP_BEG
757+
paren = term = scan(/./)
758+
759+
case term
760+
when '(' then term = ')'
761+
when '[' then term = ']'
762+
when '{' then term = '}'
763+
else paren = "\0"
784764
end
785765

766+
token, func = case str_type
767+
when 'Q'
768+
[:tSTRING_BEG, STR_DQUOTE]
769+
when 'q'
770+
[:tSTRING_BEG, STR_SQUOTE]
771+
when 'W'
772+
skip(/\s*/)
773+
[:tWORDS_BEG, STR_DWORD]
774+
when 'w', 'i'
775+
skip(/\s*/)
776+
[:tAWORDS_BEG, STR_SWORD]
777+
when 'x'
778+
[:tXSTRING_BEG, STR_XQUOTE]
779+
when 'r'
780+
[:tREGEXP_BEG, STR_REGEXP]
781+
782+
end
783+
784+
self.strterm = new_strterm2(func, term, paren)
785+
return token
786+
786787
elsif scan(/\//)
787788
if beg?
788-
self.strterm = new_strterm(:regexp, '/', '/')
789+
self.strterm = new_strterm(STR_REGEXP, '/', '/')
789790
return :tREGEXP_BEG
790791
elsif scan(/\=/)
791792
@lex_state = :expr_beg
@@ -794,7 +795,7 @@ def yylex
794795
@lex_state = :expr_arg
795796
elsif arg?
796797
if !check(/\s/) && @space_seen
797-
self.strterm = new_strterm(:regexp, '/', '/')
798+
self.strterm = new_strterm(STR_REGEXP, '/', '/')
798799
return :tREGEXP_BEG
799800
end
800801
else
@@ -811,7 +812,7 @@ def yylex
811812
if @lex_state == :expr_beg or (@lex_state == :expr_arg && @space_seen)
812813
start_word = scan(/./)
813814
end_word = { '(' => ')', '[' => ']', '{' => '}' }[start_word] || start_word
814-
self.strterm = new_strterm2(:dquote, start_word, end_word)
815+
self.strterm = new_strterm2(STR_DQUOTE, end_word, start_word)
815816
return :tSTRING_BEG
816817
end
817818
end
@@ -923,9 +924,9 @@ def yylex
923924
end
924925

925926
if scan(/\'/)
926-
self.strterm = new_strterm(:ssym, "'", "'")
927+
self.strterm = new_strterm(STR_SSYM, "'", "\0")
927928
elsif scan(/\"/)
928-
self.strterm = new_strterm(:dsym, '"', '"')
929+
self.strterm = new_strterm(STR_DSYM, '"', "\0")
929930
end
930931

931932
@lex_state = :expr_fname

0 commit comments

Comments
 (0)