From feebfe8a55ceac8b62de8a12c117d7c42a2df2d7 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Thu, 12 Apr 2012 13:14:07 -0800 Subject: [PATCH] Added compare18 and compare19 tasks to help me diff and compare against MRI - Renamed awords to qwords to match stupid MRI naming. (1.8, 1.9) :( - Fixed reswords to match MRI (1.8, 1.9) - Entirely reworked block arg handling. (1.8) - Added missing gvar arg error. (1.8) - Split block_var from for_var. (1.8, 1.9) - Made lambda w/o arg list zero out the arg slot. - Split 1.8 from 1.9 open paren lexer. Gawd that's ugly code. Refactored both 1.8 and 1.9 open paren lexing code into separate methods. Added d method to help debugging output inline with debugging racc output. Added block_var to handle generating all block_var nodes [git-p4: depot-paths = "//src/ruby_parser/dev/": change = 7324] --- Rakefile | 16 +++++++ lib/ruby18_parser.y | 83 +++++++++++++++++++++++++++++----- lib/ruby19_parser.y | 75 +++++++++++++++++++++++++++---- lib/ruby_lexer.rb | 93 ++++++++++++++++++++++++++------------- lib/ruby_parser_extras.rb | 31 ++++++++++++- test/test_ruby_lexer.rb | 10 ++--- 6 files changed, 253 insertions(+), 55 deletions(-) diff --git a/Rakefile b/Rakefile index a19c95e3..1fd7be05 100644 --- a/Rakefile +++ b/Rakefile @@ -122,4 +122,20 @@ task :isolate => :phony file "lib/ruby18_parser.rb" => :isolate file "lib/ruby19_parser.rb" => :isolate +task :compare18 do + sh "./yack.rb lib/ruby18_parser.output > racc18.txt" + sh "./yack.rb parse18.output > yacc18.txt" + sh "diff -du racc18.txt yacc18.txt || true" + puts + sh "diff -du racc18.txt yacc18.txt | wc -l" +end + +task :compare19 do + sh "./yack.rb lib/ruby19_parser.output > racc19.txt" + sh "./yack.rb parse19.output > yacc19.txt" + sh "diff -du racc19.txt yacc19.txt || true" + puts + sh "diff -du racc19.txt yacc19.txt | wc -l" +end + # vim: syntax=Ruby diff --git a/lib/ruby18_parser.y b/lib/ruby18_parser.y index 7b2b1f2a..68213dff 100644 --- a/lib/ruby18_parser.y +++ b/lib/ruby18_parser.y @@ -15,7 +15,7 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS tLBRACK tRBRACK tLBRACE tLBRACE_ARG tSTAR tSTAR2 tAMPER tAMPER2 tTILDE tPERCENT tDIVIDE tPLUS tMINUS tLT tGT tPIPE tBANG tCARET tLCURLY tRCURLY tBACK_REF2 tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG - tWORDS_BEG tAWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END tSTRING + tWORDS_BEG tQWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END tSTRING tSYMBOL tNL tEH tCOLON tCOMMA tSPACE tSEMI tLAST_TOKEN prechigh @@ -495,7 +495,7 @@ rule | kFOR | kIN | kMODULE | kNEXT | kNIL | kNOT | kOR | kREDO | kRESCUE | kRETRY | kRETURN | kSELF | kSUPER | kTHEN | kTRUE | kUNDEF | kWHEN | kYIELD - | kIF_MOD | kUNLESS_MOD | kWHILE_MOD | kUNTIL_MOD | kRESCUE_MOD + | kIF | kUNLESS | kWHILE | kUNTIL arg: lhs tEQL arg { @@ -881,7 +881,7 @@ rule { result = val[1] } - | none_block_pass + | none args: arg_value { @@ -910,7 +910,7 @@ rule | xstring | regexp | words - | awords + | qwords | var_ref | backref | tFID @@ -1044,7 +1044,7 @@ rule { result = new_case nil, val[3] } - | kFOR block_var kIN + | kFOR for_var kIN { lexer.cond.push true } @@ -1184,12 +1184,71 @@ rule result = val[1] } - block_var: lhs + for_var: lhs | mlhs { val[0].delete_at 1 if val[0][1].nil? # HACK } + block_par: mlhs_item + { + result = s(:array, val[0]) + } + | block_par tCOMMA mlhs_item + { + result = self.list_append val[0], val[2] + } + + block_var: block_par + { + result = block_var val[0], nil, nil + } + | block_par tCOMMA + { + result = block_var val[0], nil, nil + } + | block_par tCOMMA tAMPER lhs + { + result = block_var val[0], nil, val[3] + } + | block_par tCOMMA tSTAR lhs tCOMMA tAMPER lhs + { + result = block_var val[0], val[3], val[6] + } + | block_par tCOMMA tSTAR tCOMMA tAMPER lhs + { + result = block_var val[0], s(:splat), val[5] + } + | block_par tCOMMA tSTAR lhs + { + result = block_var val[0], val[3], nil + } + | block_par tCOMMA tSTAR + { + result = block_var val[0], s(:splat), nil + } + | tSTAR lhs tCOMMA tAMPER lhs + { + result = block_var nil, val[1], val[4] + } + | tSTAR tCOMMA tAMPER lhs + { + result = block_var nil, s(:splat), val[3] + } + | tSTAR lhs + { + result = block_var nil, val[1], nil + } + | tSTAR + { + result = block_var nil, s(:splat), nil + } + | tAMPER lhs + { + result = block_var nil, nil, val[1] + } + ; + opt_block_var: none | tPIPE tPIPE { @@ -1429,11 +1488,11 @@ rule result = self.literal_concat val[0], val[1] } - awords: tAWORDS_BEG tSPACE tSTRING_END + qwords: tQWORDS_BEG tSPACE tSTRING_END { result = s(:array) } - | tAWORDS_BEG qword_list tSTRING_END + | tQWORDS_BEG qword_list tSTRING_END { result = val[1] } @@ -1650,12 +1709,16 @@ xstring_contents: none f_norm_arg: tCONSTANT { - yyerror "formal argument cannot be a constant: #{val[0]}" + yyerror "formal argument cannot be a constant" } | tIVAR { yyerror "formal argument cannot be an instance variable" } + | tGVAR + { + yyerror "formal argument cannot be a global variable" + } | tCVAR { yyerror "formal argument cannot be a class variable" @@ -1788,8 +1851,6 @@ xstring_contents: none none: { result = nil } - none_block_pass: { result = nil } - end ---- inner diff --git a/lib/ruby19_parser.y b/lib/ruby19_parser.y index 408c8855..f35eb82e 100644 --- a/lib/ruby19_parser.y +++ b/lib/ruby19_parser.y @@ -15,7 +15,7 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS tLBRACK tRBRACK tLBRACE tLBRACE_ARG tSTAR tSTAR2 tAMPER tAMPER2 tTILDE tPERCENT tDIVIDE tPLUS tMINUS tLT tGT tPIPE tBANG tCARET tLCURLY tRCURLY tBACK_REF2 tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG - tWORDS_BEG tAWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END tSTRING + tWORDS_BEG tQWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END tSTRING tSYMBOL tNL tEH tCOLON tCOMMA tSPACE tSEMI tLAST_TOKEN tLAMBDA tLAMBEG prechigh @@ -495,7 +495,7 @@ rule | kFOR | kIN | kMODULE | kNEXT | kNIL | kNOT | kOR | kREDO | kRESCUE | kRETRY | kRETURN | kSELF | kSUPER | kTHEN | kTRUE | kUNDEF | kWHEN | kYIELD - | kIF_MOD | kUNLESS_MOD | kWHILE_MOD | kUNTIL_MOD | kRESCUE_MOD + | kIF | kUNLESS | kWHILE | kUNTIL arg: lhs tEQL arg { @@ -925,7 +925,7 @@ rule | xstring | regexp | words - | awords + | qwords | var_ref | backref | tFID @@ -1063,7 +1063,7 @@ rule { result = new_case nil, val[3] } - | kFOR block_var kIN + | kFOR for_var kIN { lexer.cond.push true } @@ -1203,12 +1203,71 @@ rule result = val[1] } - block_var: lhs + for_var: lhs | mlhs { val[0].delete_at 1 if val[0][1].nil? # HACK } + block_par: mlhs_item + { + result = s(:array, val[0]) + } + | block_par tCOMMA mlhs_item + { + result = self.list_append val[0], val[2] + } + + block_var: block_par + { + result = block_var val[0], nil, nil + } + | block_par tCOMMA + { + result = block_var val[0], nil, nil + } + | block_par tCOMMA tAMPER lhs + { + result = block_var val[0], nil, val[3] + } + | block_par tCOMMA tSTAR lhs tCOMMA tAMPER lhs + { + result = block_var val[0], val[3], val[6] + } + | block_par tCOMMA tSTAR tCOMMA tAMPER lhs + { + result = block_var val[0], s(:splat), val[5] + } + | block_par tCOMMA tSTAR lhs + { + result = block_var val[0], val[3], nil + } + | block_par tCOMMA tSTAR + { + result = block_var val[0], s(:splat), nil + } + | tSTAR lhs tCOMMA tAMPER lhs + { + result = block_var nil, val[1], val[4] + } + | tSTAR tCOMMA tAMPER lhs + { + result = block_var nil, s(:splat), val[3] + } + | tSTAR lhs + { + result = block_var nil, val[1], nil + } + | tSTAR + { + result = block_var nil, s(:splat), nil + } + | tAMPER lhs + { + result = block_var nil, nil, val[1] + } + ; + opt_block_var: none | tPIPE tPIPE { @@ -1299,7 +1358,7 @@ rule lambda: lambda_body { call = s(:call, nil, :lambda, s(:arglist)) - result = s(:iter, call, nil, val[0]) + result = s(:iter, call, 0, val[0]) } | f_larglist lambda_body { @@ -1492,11 +1551,11 @@ rule result = self.literal_concat val[0], val[1] } - awords: tAWORDS_BEG tSPACE tSTRING_END + qwords: tQWORDS_BEG tSPACE tSTRING_END { result = s(:array) } - | tAWORDS_BEG qword_list tSTRING_END + | tQWORDS_BEG qword_list tSTRING_END { result = val[1] } diff --git a/lib/ruby_lexer.rb b/lib/ruby_lexer.rb index e6d722a2..2399615c 100644 --- a/lib/ruby_lexer.rb +++ b/lib/ruby_lexer.rb @@ -42,7 +42,7 @@ class RubyLexer STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP STR_FUNC_EXPAND = 0x02 STR_FUNC_REGEXP = 0x04 - STR_FUNC_AWORDS = 0x08 + STR_FUNC_QWORDS = 0x08 STR_FUNC_SYMBOL = 0x10 STR_FUNC_INDENT = 0x20 # <<-HEREDOC @@ -314,10 +314,10 @@ def parse_quote # 58 lines [:tSTRING_BEG, STR_SQUOTE] when 'W' then src.scan(/\s*/) - [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS] + [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS] when 'w' then src.scan(/\s*/) - [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS] + [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS] when 'x' then [:tXSTRING_BEG, STR_XQUOTE] when 'r' then @@ -343,7 +343,7 @@ def parse_string(quote) # 65 lines paren = open term_re = Regexp.escape term - awords = (func & STR_FUNC_AWORDS) != 0 + qwords = (func & STR_FUNC_QWORDS) != 0 regexp = (func & STR_FUNC_REGEXP) != 0 expand = (func & STR_FUNC_EXPAND) != 0 @@ -352,10 +352,10 @@ def parse_string(quote) # 65 lines return :tSTRING_END end - space = true if awords and src.scan(/\s+/) + space = true if qwords and src.scan(/\s+/) if self.nest == 0 && src.scan(/#{term_re}/) then - if awords then + if qwords then quote[1] = nil return :tSPACE elsif regexp then @@ -508,7 +508,7 @@ def tokadd_escape term # 20 lines end def tokadd_string(func, term, paren) # 105 lines - awords = (func & STR_FUNC_AWORDS) != 0 + qwords = (func & STR_FUNC_QWORDS) != 0 escape = (func & STR_FUNC_ESCAPE) != 0 expand = (func & STR_FUNC_EXPAND) != 0 regexp = (func & STR_FUNC_REGEXP) != 0 @@ -528,7 +528,7 @@ def tokadd_string(func, term, paren) # 105 lines self.nest += 1 when src.scan(term_re) then self.nest -= 1 - when awords && src.scan(/\s/) then + when qwords && src.scan(/\s/) then src.pos -= 1 break when expand && src.scan(/#(?=[\$\@\{])/) then @@ -538,10 +538,10 @@ def tokadd_string(func, term, paren) # 105 lines # do nothing when src.check(/\\/) then case - when awords && src.scan(/\\\n/) then + when qwords && src.scan(/\\\n/) then string_buffer << "\n" next - when awords && src.scan(/\\\s/) then + when qwords && src.scan(/\\\s/) then c = ' ' when expand && src.scan(/\\\n/) then next @@ -570,7 +570,7 @@ def tokadd_string(func, term, paren) # 105 lines t = Regexp.escape term x = Regexp.escape(paren) if paren && paren != "\000" - re = if awords then + re = if qwords then /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever else /[^#{t}#{x}\#\0\\]+|./ @@ -710,24 +710,11 @@ def yylex # 826 lines return :tDOT end elsif src.scan(/\(/) then - result = :tLPAREN2 - - self.command_start = true if ruby18 - - if lex_state == :expr_beg || lex_state == :expr_mid then - result = :tLPAREN - elsif space_seen then - if lex_state == :expr_cmdarg then - result = :tLPAREN_ARG - elsif lex_state == :expr_arg then - self.tern.push false - warning("don't put space before argument parentheses") - - result = :tLPAREN2 - end - else - self.tern.push false - end + result = if ruby18 then + yylex_paren18 space_seen + else + yylex_paren19 space_seen + end self.expr_beg_push "(" @@ -1244,6 +1231,53 @@ def yylex # 826 lines end end + def yylex_paren18 space_seen + self.command_start = true + result = :tLPAREN2 + + if lex_state == :expr_beg || lex_state == :expr_mid then + result = :tLPAREN + elsif space_seen then + if lex_state == :expr_cmdarg then + result = :tLPAREN_ARG + elsif lex_state == :expr_arg then + self.tern.push false + warning "don't put space before argument parentheses" + end + else + self.tern.push false + end + + result + end + + def yylex_paren19 space_seen + if (lex_state == :expr_beg || lex_state == :expr_mid || + lex_state == :expr_value || lex_state == :expr_class) then + result = :tLPAREN + elsif ((lex_state == :expr_arg || lex_state == :expr_cmdarg) and + space_seen) then + result = :tLPAREN_ARG + else + self.tern.push false + result = :tLPAREN2 + end + # HACK paren_nest++; + + # HACK: this is a mess, but it makes the tests pass, so suck it + # (stolen from the 1.8 side) + if lex_state == :expr_beg || lex_state == :expr_mid then + # do nothing + elsif space_seen then + if lex_state == :expr_arg then + self.tern.push false + end + else + self.tern.push false + end + result + end + def process_token(command_state) token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/) @@ -1251,7 +1285,6 @@ def process_token(command_state) result = nil last_state = lex_state - case token when /^\$/ then self.lex_state, result = :expr_end, :tGVAR diff --git a/lib/ruby_parser_extras.rb b/lib/ruby_parser_extras.rb index 4fb3e8e6..4f539d4b 100644 --- a/lib/ruby_parser_extras.rb +++ b/lib/ruby_parser_extras.rb @@ -3,6 +3,10 @@ require 'sexp' require 'strscan' +def d o + $stderr.puts o.inspect +end + # WHY do I have to do this?!? class Regexp ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense @@ -66,7 +70,7 @@ def getch alias :old_scan :scan def scan re s = old_scan re - p :scan => [s, caller.first] if s + d :scan => [s, caller.first] if s s end end @@ -137,6 +141,31 @@ def arg_concat node1, node2 # TODO: nuke node1 end + def block_var ary, splat, block + ary ||= s(:array) + + if splat then + if splat == s(:splat) then + ary << splat + else + ary << s(:splat, splat) + end + end + + if block then + block[-1] = :"&#{block[-1]}" + ary << block + end + + result = if ary.length > 2 or ary.splat then + s(:masgn, ary) + else + ary.last + end + + result + end + def args arg, optarg, rest_arg, block_arg, post_arg = nil arg ||= s(:args) diff --git a/test/test_ruby_lexer.rb b/test/test_ruby_lexer.rb index 8470b6f2..e7fe9bdf 100755 --- a/test/test_ruby_lexer.rb +++ b/test/test_ruby_lexer.rb @@ -1693,7 +1693,7 @@ def test_yylex_string_pct_other def test_yylex_string_pct_w util_bad_token("%w[s1 s2 ", - :tAWORDS_BEG, "%w[", + :tQWORDS_BEG, "%w[", :tSTRING_CONTENT, "s1", :tSPACE, nil, :tSTRING_CONTENT, "s2", @@ -1702,7 +1702,7 @@ def test_yylex_string_pct_w def test_yylex_string_pct_w_bs_nl util_lex_token("%w[s1 \\\ns2]", - :tAWORDS_BEG, "%w[", + :tQWORDS_BEG, "%w[", :tSTRING_CONTENT, "s1", :tSPACE, nil, :tSTRING_CONTENT, "\ns2", @@ -1712,7 +1712,7 @@ def test_yylex_string_pct_w_bs_nl def test_yylex_string_pct_w_bs_sp util_lex_token("%w[s\\ 1 s\\ 2]", - :tAWORDS_BEG, "%w[", + :tQWORDS_BEG, "%w[", :tSTRING_CONTENT, "s 1", :tSPACE, nil, :tSTRING_CONTENT, "s 2", @@ -1722,7 +1722,7 @@ def test_yylex_string_pct_w_bs_sp def test_yylex_string_pct_w_tab util_lex_token("%w[abc\tdef]", - :tAWORDS_BEG, "%w[", + :tQWORDS_BEG, "%w[", :tSTRING_CONTENT, "abc\tdef", :tSPACE, nil, :tSTRING_END, nil) @@ -1899,7 +1899,7 @@ def util_lex_token input, *args token = args.shift value = args.shift assert @lex.advance, "no more tokens" - assert_equal [token, value], [@lex.token, [@lex.yacc_value].flatten.first] + assert_equal [token, value], [@lex.token, [@lex.yacc_value].flatten.first], input end deny @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"