Fix process_continue(rename to should_continue?) and check_code_block(rename to check_code_syntax) (#611)

tompng · web-flow · commit b7f4bfaaa459 · 2023-06-25T14:12:12.000+09:00
diff --git a/lib/irb/cmd/show_source.rb b/lib/irb/cmd/show_source.rb
@@ -58,9 +58,9 @@ def find_end(file, first_line, irb_context)
           tokens.chunk { |tok| tok.pos[0] }.each do |lnum, chunk|
             code = lines[0..lnum].join
             prev_tokens.concat chunk
-            continue = lex.process_continue(prev_tokens)
-            code_block_open = lex.check_code_block(code, prev_tokens)
-            if !continue && !code_block_open
+            continue = lex.should_continue?(prev_tokens)
+            syntax = lex.check_code_syntax(code)
+            if !continue && syntax == :valid
               return first_line + lnum
             end
           end
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
@@ -85,7 +85,7 @@ def configure_io(io)
             # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines.
             tokens_until_line << token if token != tokens_until_line.last
           end
-          continue = process_continue(tokens_until_line)
+          continue = should_continue?(tokens_until_line)
           prompt(next_opens, continue, line_num_offset)
         end
       end
@@ -196,7 +196,16 @@ def check_code_state(code)
   end
 
   def code_terminated?(code, tokens, opens)
-    opens.empty? && !process_continue(tokens) && !check_code_block(code, tokens)
+    case check_code_syntax(code)
+    when :unrecoverable_error
+      true
+    when :recoverable_error
+      false
+    when :other_error
+      opens.empty? && !should_continue?(tokens)
+    when :valid
+      !should_continue?(tokens)
+    end
   end
 
   def save_prompt_to_context_io(opens, continue, line_num_offset)
@@ -227,7 +236,7 @@ def readmultiline
       return code if terminated
 
       line_offset += 1
-      continue = process_continue(tokens)
+      continue = should_continue?(tokens)
       save_prompt_to_context_io(opens, continue, line_offset)
     end
   end
@@ -246,29 +255,33 @@ def each_top_level_statement
     end
   end
 
-  def process_continue(tokens)
-    # last token is always newline
-    if tokens.size >= 2 and tokens[-2].event == :on_regexp_end
-      # end of regexp literal
-      return false
-    elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon
-      return false
-    elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok)
-      return false
-    elsif !tokens.empty? and tokens.last.tok == "\\\n"
-      return true
-    elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n"
-      return false
-    elsif tokens.size >= 2 and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/
-      # end of literal except for regexp
-      # endless range at end of line is not a continue
-      return true
+  def should_continue?(tokens)
+    # Look at the last token and check if IRB need to continue reading next line.
+    # Example code that should continue: `a\` `a +` `a.`
+    # Trailing spaces, newline, comments are skipped
+    return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
+
+    tokens.reverse_each do |token|
+      case token.event
+      when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
+        # Skip
+      when :on_regexp_end, :on_heredoc_end, :on_semicolon
+        # State is EXPR_BEG but should not continue
+        return false
+      else
+        # Endless range should not continue
+        return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
+
+        # EXPR_DOT and most of the EXPR_BEG should continue
+        return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
+      end
     end
     false
   end
 
-  def check_code_block(code, tokens)
-    return true if tokens.empty?
+  def check_code_syntax(code)
+    lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables)
+    code = "#{lvars_code}\n#{code}"
 
     begin # check if parser error are available
       verbose, $VERBOSE = $VERBOSE, nil
@@ -287,6 +300,7 @@ def check_code_block(code, tokens)
       end
     rescue EncodingError
       # This is for a hash with invalid encoding symbol, {"\xAE": 1}
+      :unrecoverable_error
     rescue SyntaxError => e
       case e.message
       when /unterminated (?:string|regexp) meets end of file/
@@ -299,7 +313,7 @@ def check_code_block(code, tokens)
         #
         #   example:
         #     '
-        return true
+        return :recoverable_error
       when /syntax error, unexpected end-of-input/
         # "syntax error, unexpected end-of-input, expecting keyword_end"
         #
@@ -309,7 +323,7 @@ def check_code_block(code, tokens)
         #       if false
         #         fuga
         #       end
-        return true
+        return :recoverable_error
       when /syntax error, unexpected keyword_end/
         # "syntax error, unexpected keyword_end"
         #
@@ -319,41 +333,26 @@ def check_code_block(code, tokens)
         #
         #   example:
         #     end
-        return false
+        return :unrecoverable_error
       when /syntax error, unexpected '\.'/
         # "syntax error, unexpected '.'"
         #
         #   example:
         #     .
-        return false
+        return :unrecoverable_error
       when /unexpected tREGEXP_BEG/
         # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
         #
         #   example:
         #     method / f /
-        return false
+        return :unrecoverable_error
+      else
+        return :other_error
       end
     ensure
       $VERBOSE = verbose
     end
-
-    last_lex_state = tokens.last.state
-
-    if last_lex_state.allbits?(Ripper::EXPR_BEG)
-      return false
-    elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
-      return false
-    end
-
-    false
+    :valid
   end
 
   def calc_indent_level(opens)
diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb
@@ -82,25 +82,33 @@ def assert_row_indenting(lines, row)
     end
 
     def assert_indent_level(lines, expected, local_variables: [])
-      indent_level, _code_block_open = check_state(lines, local_variables: local_variables)
+      indent_level, _continue, _code_block_open = check_state(lines, local_variables: local_variables)
       error_message = "Calculated the wrong number of indent level for:\n #{lines.join("\n")}"
       assert_equal(expected, indent_level, error_message)
     end
 
+    def assert_should_continue(lines, expected, local_variables: [])
+      _indent_level, continue, _code_block_open = check_state(lines, local_variables: local_variables)
+      error_message = "Wrong result of should_continue for:\n #{lines.join("\n")}"
+      assert_equal(expected, continue, error_message)
+    end
+
     def assert_code_block_open(lines, expected, local_variables: [])
-      _indent_level, code_block_open = check_state(lines, local_variables: local_variables)
+      _indent_level, _continue, code_block_open = check_state(lines, local_variables: local_variables)
       error_message = "Wrong result of code_block_open for:\n #{lines.join("\n")}"
       assert_equal(expected, code_block_open, error_message)
     end
 
     def check_state(lines, local_variables: [])
       context = build_context(local_variables)
-      tokens = RubyLex.ripper_lex_without_warning(lines.join("\n"), context: context)
+      code = lines.join("\n")
+      tokens = RubyLex.ripper_lex_without_warning(code, context: context)
       opens = IRB::NestingParser.open_tokens(tokens)
       ruby_lex = RubyLex.new(context)
       indent_level = ruby_lex.calc_indent_level(opens)
-      code_block_open = !opens.empty? || ruby_lex.process_continue(tokens)
-      [indent_level, code_block_open]
+      continue = ruby_lex.should_continue?(tokens)
+      terminated = ruby_lex.code_terminated?(code, tokens, opens)
+      [indent_level, continue, !terminated]
     end
 
     def test_interpolate_token_with_heredoc_and_unclosed_embexpr
@@ -235,7 +243,7 @@ def test_symbols
     def test_endless_range_at_end_of_line
       input_with_prompt = [
         PromptRow.new('001:0: :> ', %q(a = 3..)),
-        PromptRow.new('002:0: :* ', %q()),
+        PromptRow.new('002:0: :> ', %q()),
       ]
 
       lines = input_with_prompt.map(&:content)
@@ -256,7 +264,7 @@ def test_heredoc_with_embexpr
         PromptRow.new('009:0:]:* ', %q(B)),
         PromptRow.new('010:0:]:* ', %q(})),
         PromptRow.new('011:0: :> ', %q(])),
-        PromptRow.new('012:0: :* ', %q()),
+        PromptRow.new('012:0: :> ', %q()),
       ]
 
       lines = input_with_prompt.map(&:content)
@@ -285,9 +293,9 @@ def test_heredoc_prompt_with_quotes
     def test_backtick_method
       input_with_prompt = [
         PromptRow.new('001:0: :> ', %q(self.`(arg))),
-        PromptRow.new('002:0: :* ', %q()),
+        PromptRow.new('002:0: :> ', %q()),
         PromptRow.new('003:0: :> ', %q(def `(); end)),
-        PromptRow.new('004:0: :* ', %q()),
+        PromptRow.new('004:0: :> ', %q()),
       ]
 
       lines = input_with_prompt.map(&:content)
@@ -777,6 +785,36 @@ def test_dynamic_prompt_with_blank_line
       assert_dynamic_prompt(lines, expected_prompt_list)
     end
 
+    def test_should_continue
+      assert_should_continue(['a'], false)
+      assert_should_continue(['/a/'], false)
+      assert_should_continue(['a;'], false)
+      assert_should_continue(['<<A', 'A'], false)
+      assert_should_continue(['a...'], false)
+      assert_should_continue(['a\\', ''], true)
+      assert_should_continue(['a.'], true)
+      assert_should_continue(['a+'], true)
+      assert_should_continue(['a; #comment', '', '=begin', 'embdoc', '=end', ''], false)
+      assert_should_continue(['a+ #comment', '', '=begin', 'embdoc', '=end', ''], true)
+    end
+
+    def test_code_block_open_with_should_continue
+      # syntax ok
+      assert_code_block_open(['a'], false) # continue: false
+      assert_code_block_open(['a\\', ''], true) # continue: true
+
+      # recoverable syntax error code is not terminated
+      assert_code_block_open(['a+', ''], true)
+
+      # unrecoverable syntax error code is terminated
+      assert_code_block_open(['.; a+', ''], false)
+
+      # other syntax error that failed to determine if it is recoverable or not
+      assert_code_block_open(['@; a'], false)
+      assert_code_block_open(['@; a+'], true)
+      assert_code_block_open(['@; (a'], true)
+    end
+
     def test_broken_percent_literal
       tokens = RubyLex.ripper_lex_without_warning('%wwww')
       pos_to_index = {}