Skip to content

Commit

Permalink
[ruby/prism] Fix up some minor parser incompatibilities
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton authored and matzbot committed Mar 4, 2024
1 parent 2c787bf commit 5856ea3
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 12 deletions.
2 changes: 2 additions & 0 deletions lib/prism/translation.rb
Expand Up @@ -5,6 +5,8 @@ module Prism
# syntax trees.
module Translation # steep:ignore
autoload :Parser, "prism/translation/parser"
autoload :Parser33, "prism/translation/parser33"
autoload :Parser34, "prism/translation/parser34"
autoload :Ripper, "prism/translation/ripper"
autoload :RubyParser, "prism/translation/ruby_parser"
end
Expand Down
2 changes: 1 addition & 1 deletion lib/prism/translation/parser.rb
Expand Up @@ -168,7 +168,7 @@ def build_comments(comments, offset_cache)

# Build the parser gem tokens from the prism tokens.
def build_tokens(tokens, offset_cache)
Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
Lexer.new(source_buffer, tokens, offset_cache).to_a
end

# Build a range from a prism location.
Expand Down
31 changes: 22 additions & 9 deletions lib/prism/translation/parser/lexer.rb
Expand Up @@ -177,12 +177,23 @@ class Lexer
WORDS_SEP: :tSPACE
}

private_constant :TYPES
# These constants represent flags in our lex state. We really, really
# don't want to be using them and we really, really don't want to be
# exposing them as part of our public API. Unfortunately, we don't have
# another way of matching the exact tokens that the parser gem expects
# without them. We should find another way to do this, but in the
# meantime we'll hide them from the documentation and mark them as
# private constants.
EXPR_BEG = 0x1 # :nodoc:
EXPR_LABEL = 0x400 # :nodoc:

private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL

# The Parser::Source::Buffer that the tokens were lexed from.
attr_reader :source_buffer

# An array of prism tokens that we lexed.
# An array of tuples that contain prism tokens and their associated lex
# state when they were lexed.
attr_reader :lexed

# A hash that maps offsets in bytes to offsets in characters.
Expand All @@ -205,9 +216,9 @@ def to_a
index = 0

while index < lexed.length
token, = lexed[index]
token, state = lexed[index]
index += 1
next if token.type == :IGNORED_NEWLINE || token.type == :EOF
next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)

type = TYPES.fetch(token.type)
value = token.value
Expand All @@ -218,13 +229,13 @@ def to_a
value.delete_prefix!("?")
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
until (next_token = lexed[index][0]) && next_token.type == :EMBDOC_END
value += next_token.value
index += 1
end

value += next_token.value
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
index += 1
else
value.chomp!
Expand All @@ -247,6 +258,8 @@ def to_a
value.chomp!(":")
when :tLABEL_END
value.chomp!(":")
when :tLCURLY
type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
when :tNTH_REF
value = Integer(value.delete_prefix("$"))
when :tOP_ASGN
Expand All @@ -256,13 +269,13 @@ def to_a
when :tSPACE
value = nil
when :tSTRING_BEG
if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
next_location = token.location.join(next_token.location)
type = :tSTRING
value = ""
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 1
elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
next_location = token.location.join(next_next_token.location)
type = :tSTRING
value = next_token.value
Expand All @@ -280,7 +293,7 @@ def to_a
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
end
when :tSYMBEG
if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
Expand Down
2 changes: 1 addition & 1 deletion lib/prism/translation/parser33.rb
@@ -1,4 +1,4 @@
require_relative "parser"
# frozen_string_literal: true

module Prism
module Translation
Expand Down
2 changes: 1 addition & 1 deletion lib/prism/translation/parser34.rb
@@ -1,4 +1,4 @@
require_relative "parser"
# frozen_string_literal: true

module Prism
module Translation
Expand Down
2 changes: 2 additions & 0 deletions test/prism/parser_test.rb
Expand Up @@ -101,9 +101,11 @@ def test_warnings

parser = Prism::Translation::Parser33.new
parser.diagnostics.all_errors_are_fatal = false

warning = nil
parser.diagnostics.consumer = ->(received) { warning = received }
parser.parse(buffer)

assert_equal :warning, warning.level
assert_includes warning.message, "has been interpreted as"
end
Expand Down

0 comments on commit 5856ea3

Please sign in to comment.