Optimize array inclusion checks in the parser translator

Earlopain · Earlopain · commit ca9500a3fc4e · 2025-03-12T21:08:06.000+01:00
I see `Array.include?` as 2.4% runtime. Probably because of `LPAREN_CONVERSION_TOKEN_TYPES` but
the others will be faster as well.

Also remove some inline array checks. They are specifically optimized in Ruby since 3.4, but for now prism is for &gt;= 2.7
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 
+require "set"
 require "strscan"
 require_relative "../../polyfill/append_as_bytes"
 
@@ -9,16 +10,17 @@ class Parser
       # Accepts a list of prism tokens and converts them into the expected
       # format for the parser gem.
       class Lexer
+        # These tokens are always skipped
+        TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set
+        private_constant :TYPES_ALWAYS_SKIP
+
         # The direct translating of types between the two lexers.
         TYPES = {
           # These tokens should never appear in the output of the lexer.
-          EOF: nil,
           MISSING: nil,
           NOT_PROVIDED: nil,
-          IGNORED_NEWLINE: nil,
           EMBDOC_END: nil,
           EMBDOC_LINE: nil,
-          __END__: nil,
 
           # These tokens have more or less direct mappings.
           AMPERSAND: :tAMPER2,
@@ -194,18 +196,18 @@ class Lexer
         #
         # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
         # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
-        LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]
+        LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set
 
         # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
         # The following token types are listed as those classified as `tLPAREN`.
         LPAREN_CONVERSION_TOKEN_TYPES = [
           :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
           :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
-        ]
+        ].to_set
 
         # Types of tokens that are allowed to continue a method call with comments in-between.
         # For these, the parser gem doesn't emit a newline token after the last comment.
-        COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT]
+        COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set
         private_constant :COMMENT_CONTINUATION_TYPES
 
         # Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -252,7 +254,7 @@ def to_a
           while index < length
             token, state = lexed[index]
             index += 1
-            next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
+            next if TYPES_ALWAYS_SKIP.include?(token.type)
 
             type = TYPES.fetch(token.type)
             value = token.value
@@ -344,7 +346,7 @@ def to_a
             when :tSTRING_BEG
               next_token = lexed[index][0]
               next_next_token = lexed[index + 1][0]
-              basic_quotes = ["\"", "'"].include?(value)
+              basic_quotes = value == '"' || value == "'"
 
               if basic_quotes && next_token&.type == :STRING_END
                 next_location = token.location.join(next_token.location)