1
1
# frozen_string_literal: true
2
2
3
+ require "set"
3
4
require "strscan"
4
5
require_relative "../../polyfill/append_as_bytes"
5
6
@@ -9,16 +10,17 @@ class Parser
9
10
# Accepts a list of prism tokens and converts them into the expected
10
11
# format for the parser gem.
11
12
class Lexer
13
+ # These tokens are always skipped
14
+ TYPES_ALWAYS_SKIP = %i[ IGNORED_NEWLINE __END__ EOF ] . to_set
15
+ private_constant :TYPES_ALWAYS_SKIP
16
+
12
17
# The direct translating of types between the two lexers.
13
18
TYPES = {
14
19
# These tokens should never appear in the output of the lexer.
15
- EOF : nil ,
16
20
MISSING : nil ,
17
21
NOT_PROVIDED : nil ,
18
- IGNORED_NEWLINE : nil ,
19
22
EMBDOC_END : nil ,
20
23
EMBDOC_LINE : nil ,
21
- __END__ : nil ,
22
24
23
25
# These tokens have more or less direct mappings.
24
26
AMPERSAND : :tAMPER2 ,
@@ -194,18 +196,18 @@ class Lexer
194
196
#
195
197
# NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
196
198
# instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
197
- LAMBDA_TOKEN_TYPES = [ :kDO_LAMBDA , :tLAMBDA , :tLAMBEG ]
199
+ LAMBDA_TOKEN_TYPES = [ :kDO_LAMBDA , :tLAMBDA , :tLAMBEG ] . to_set
198
200
199
201
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
200
202
# The following token types are listed as those classified as `tLPAREN`.
201
203
LPAREN_CONVERSION_TOKEN_TYPES = [
202
204
:kBREAK , :kCASE , :tDIVIDE , :kFOR , :kIF , :kNEXT , :kRETURN , :kUNTIL , :kWHILE , :tAMPER , :tANDOP , :tBANG , :tCOMMA , :tDOT2 , :tDOT3 ,
203
205
:tEQL , :tLPAREN , :tLPAREN2 , :tLPAREN_ARG , :tLSHFT , :tNL , :tOP_ASGN , :tOROP , :tPIPE , :tSEMI , :tSTRING_DBEG , :tUMINUS , :tUPLUS
204
- ]
206
+ ] . to_set
205
207
206
208
# Types of tokens that are allowed to continue a method call with comments in-between.
207
209
# For these, the parser gem doesn't emit a newline token after the last comment.
208
- COMMENT_CONTINUATION_TYPES = [ :COMMENT , :AMPERSAND_DOT , :DOT ]
210
+ COMMENT_CONTINUATION_TYPES = [ :COMMENT , :AMPERSAND_DOT , :DOT ] . to_set
209
211
private_constant :COMMENT_CONTINUATION_TYPES
210
212
211
213
# Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -252,7 +254,7 @@ def to_a
252
254
while index < length
253
255
token , state = lexed [ index ]
254
256
index += 1
255
- next if %i[ IGNORED_NEWLINE __END__ EOF ] . include? ( token . type )
257
+ next if TYPES_ALWAYS_SKIP . include? ( token . type )
256
258
257
259
type = TYPES . fetch ( token . type )
258
260
value = token . value
@@ -344,7 +346,7 @@ def to_a
344
346
when :tSTRING_BEG
345
347
next_token = lexed [ index ] [ 0 ]
346
348
next_next_token = lexed [ index + 1 ] [ 0 ]
347
- basic_quotes = [ " \" " , "'" ] . include? ( value )
349
+ basic_quotes = value == '"' || value == "'"
348
350
349
351
if basic_quotes && next_token &.type == :STRING_END
350
352
next_location = token . location . join ( next_token . location )
0 commit comments