Fix up some minor parser incompatibilities

kddnewton · kddnewton · commit c6c771d1fa4e · 2024-03-04T09:27:18.000-05:00
diff --git a/bin/prism b/bin/prism
@@ -224,14 +224,21 @@ module Prism
 
     # bin/prism parser [source]
     def parser(argv)
-      require "parser/current"
+      require "parser/ruby33"
       source, filepath = read_source(argv)
 
+      buffer = Parser::Source::Buffer.new(filepath, 1)
+      buffer.source = source
+
       puts "Parser:"
-      pp Parser::CurrentRuby.parse(source, filepath)
+      parser_ast, _, parser_tokens = Parser::Ruby33.new.tokenize(buffer)
+      pp parser_ast
+      pp parser_tokens
 
       puts "Prism:"
-      pp Translation::Parser.parse(source, filepath)
+      prism_ast, _, prism_tokens = Prism::Translation::Parser33.new.tokenize(buffer)
+      pp prism_ast
+      pp prism_tokens
     end
 
     # bin/prism ripper [source]
diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb
@@ -5,6 +5,8 @@ module Prism
   # syntax trees.
   module Translation # steep:ignore
     autoload :Parser, "prism/translation/parser"
+    autoload :Parser33, "prism/translation/parser33"
+    autoload :Parser34, "prism/translation/parser34"
     autoload :Ripper, "prism/translation/ripper"
     autoload :RubyParser, "prism/translation/ruby_parser"
   end
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
@@ -168,7 +168,7 @@ def build_comments(comments, offset_cache)
 
       # Build the parser gem tokens from the prism tokens.
       def build_tokens(tokens, offset_cache)
-        Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
+        Lexer.new(source_buffer, tokens, offset_cache).to_a
       end
 
       # Build a range from a prism location.
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
@@ -177,12 +177,23 @@ class Lexer
           WORDS_SEP: :tSPACE
         }
 
-        private_constant :TYPES
+        # These constants represent flags in our lex state. We really, really
+        # don't want to be using them and we really, really don't want to be
+        # exposing them as part of our public API. Unfortunately, we don't have
+        # another way of matching the exact tokens that the parser gem expects
+        # without them. We should find another way to do this, but in the
+        # meantime we'll hide them from the documentation and mark them as
+        # private constants.
+        EXPR_BEG = 0x1 # :nodoc:
+        EXPR_LABEL = 0x400 # :nodoc:
+
+        private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
 
         # The Parser::Source::Buffer that the tokens were lexed from.
         attr_reader :source_buffer
 
-        # An array of prism tokens that we lexed.
+        # An array of tuples that contain prism tokens and their associated lex
+        # state when they were lexed.
         attr_reader :lexed
 
         # A hash that maps offsets in bytes to offsets in characters.
@@ -205,9 +216,9 @@ def to_a
           index = 0
 
           while index < lexed.length
-            token, = lexed[index]
+            token, state = lexed[index]
             index += 1
-            next if token.type == :IGNORED_NEWLINE || token.type == :EOF
+            next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
 
             type = TYPES.fetch(token.type)
             value = token.value
@@ -218,13 +229,13 @@ def to_a
               value.delete_prefix!("?")
             when :tCOMMENT
               if token.type == :EMBDOC_BEGIN
-                until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
+                until (next_token = lexed[index][0]) && next_token.type == :EMBDOC_END
                   value += next_token.value
                   index += 1
                 end
 
                 value += next_token.value
-                location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
+                location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
                 index += 1
               else
                 value.chomp!
@@ -247,6 +258,8 @@ def to_a
               value.chomp!(":")
             when :tLABEL_END
               value.chomp!(":")
+            when :tLCURLY
+              type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
             when :tNTH_REF
               value = Integer(value.delete_prefix("$"))
             when :tOP_ASGN
@@ -256,13 +269,13 @@ def to_a
             when :tSPACE
               value = nil
             when :tSTRING_BEG
-              if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
+              if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
                 next_location = token.location.join(next_token.location)
                 type = :tSTRING
                 value = ""
                 location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
                 index += 1
-              elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
+              elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
                 next_location = token.location.join(next_next_token.location)
                 type = :tSTRING
                 value = next_token.value
@@ -280,7 +293,7 @@ def to_a
                 location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
               end
             when :tSYMBEG
-              if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
+              if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
                 next_location = token.location.join(next_token.location)
                 type = :tSYMBOL
                 value = next_token.value
diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb
@@ -1,4 +1,4 @@
-require_relative "parser"
+# frozen_string_literal: true
 
 module Prism
   module Translation
diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb
@@ -1,4 +1,4 @@
-require_relative "parser"
+# frozen_string_literal: true
 
 module Prism
   module Translation
diff --git a/test/prism/parser_test.rb b/test/prism/parser_test.rb
@@ -101,9 +101,11 @@ def test_warnings
 
       parser = Prism::Translation::Parser33.new
       parser.diagnostics.all_errors_are_fatal = false
+
       warning = nil
       parser.diagnostics.consumer = ->(received) { warning = received }
       parser.parse(buffer)
+
       assert_equal :warning, warning.level
       assert_includes warning.message, "has been interpreted as"
     end

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-require_relative "parser"`
	`1`	`+# frozen_string_literal: true`
`2`	`2`
`3`	`3`	`module Prism`
`4`	`4`	`module Translation`