Skip to content

Commit cfd4f28

Browse files
committed
Handle more aliases. Better testing of prism ripper CLI and a test for it.
1 parent 0adb234 commit cfd4f28

File tree

3 files changed

+229
-43
lines changed

3 files changed

+229
-43
lines changed

bin/prism

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ module Prism
220220
source, filepath = read_source(argv)
221221

222222
ripper = Ripper.sexp_raw(source)
223-
prism = Prism::RipperCompat.sexp_raw(source)
223+
prism = Prism::Translation::Ripper.sexp_raw(source) rescue :parse_error
224224

225225
puts "Ripper:"
226226
pp ripper

lib/prism/translation/ripper.rb

Lines changed: 144 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,44 @@ def visit_break_node(node)
222222
on_break(on_args_add_block(args_val, false))
223223
end
224224

225+
# Visit an AliasMethodNode.
226+
def visit_alias_method_node(node)
227+
# For both the old and new name, if there is a colon in the symbol
228+
# name (e.g. 'alias :foo :bar') then we do *not* emit the [:symbol] wrapper around
229+
# the lexer token (e.g. :@ident) inside [:symbol_literal]. But if there
230+
# is no colon (e.g. 'alias foo bar') then we *do* still emit the [:symbol] wrapper.
231+
232+
if node.new_name.is_a?(SymbolNode) && !node.new_name.opening
233+
new_name_val = visit_symbol_literal_node(node.new_name, no_symbol_wrapper: true)
234+
else
235+
new_name_val = visit(node.new_name)
236+
end
237+
if node.old_name.is_a?(SymbolNode) && !node.old_name.opening
238+
old_name_val = visit_symbol_literal_node(node.old_name, no_symbol_wrapper: true)
239+
else
240+
old_name_val = visit(node.old_name)
241+
end
242+
243+
on_alias(new_name_val, old_name_val)
244+
end
245+
246+
# Visit an AliasGlobalVariableNode.
247+
def visit_alias_global_variable_node(node)
248+
on_var_alias(visit(node.new_name), visit(node.old_name))
249+
end
250+
251+
# Visit a GlobalVariableReadNode.
252+
def visit_global_variable_read_node(node)
253+
bounds(node.location)
254+
on_gvar(node.name.to_s)
255+
end
256+
257+
# Visit a BackReferenceReadNode.
258+
def visit_back_reference_read_node(node)
259+
bounds(node.location)
260+
on_backref(node.name.to_s)
261+
end
262+
225263
# Visit an AndNode.
226264
def visit_and_node(node)
227265
visit_binary_operator(node)
@@ -326,23 +364,7 @@ def visit_x_string_node(node)
326364

327365
# Visit an InterpolatedStringNode node.
328366
def visit_interpolated_string_node(node)
329-
parts = node.parts.map do |part|
330-
case part
331-
when StringNode
332-
bounds(part.content_loc)
333-
on_tstring_content(part.content)
334-
when EmbeddedStatementsNode
335-
on_string_embexpr(visit(part))
336-
else
337-
raise NotImplementedError, "Unexpected node type in InterpolatedStringNode"
338-
end
339-
end
340-
341-
string_list = parts.inject(on_string_content) do |items, item|
342-
on_string_add(items, item)
343-
end
344-
345-
on_string_literal(string_list)
367+
on_string_literal(visit_enumerated_node(node))
346368
end
347369

348370
# Visit an EmbeddedStatementsNode node.
@@ -352,15 +374,12 @@ def visit_embedded_statements_node(node)
352374

353375
# Visit a SymbolNode node.
354376
def visit_symbol_node(node)
355-
if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s"))
356-
bounds(node.value_loc)
357-
tstring_val = on_tstring_content(node.value.to_s)
358-
return on_dyna_symbol(on_string_add(on_string_content, tstring_val))
359-
end
377+
visit_symbol_literal_node(node)
378+
end
360379

361-
bounds(node.value_loc)
362-
ident_val = on_ident(node.value.to_s)
363-
on_symbol_literal(on_symbol(ident_val))
380+
# Visit an InterpolatedSymbolNode node.
381+
def visit_interpolated_symbol_node(node)
382+
on_dyna_symbol(visit_enumerated_node(node))
364383
end
365384

366385
# Visit a StatementsNode node.
@@ -459,6 +478,25 @@ def visit_elements(elements)
459478
end
460479
end
461480

481+
# Visit an InterpolatedStringNode or an InterpolatedSymbolNode node.
482+
def visit_enumerated_node(node)
483+
parts = node.parts.map do |part|
484+
case part
485+
when StringNode
486+
bounds(part.content_loc)
487+
on_tstring_content(part.content)
488+
when EmbeddedStatementsNode
489+
on_string_embexpr(visit(part))
490+
else
491+
raise NotImplementedError, "Unexpected node type in visit_enumerated_node"
492+
end
493+
end
494+
495+
parts.inject(on_string_content) do |items, item|
496+
on_string_add(items, item)
497+
end
498+
end
499+
462500
# Visit an operation-and-assign node, such as +=.
463501
def visit_binary_op_assign(node, operator: node.operator)
464502
bounds(node.name_loc)
@@ -487,6 +525,87 @@ def visit_aref_field_node(node)
487525
on_assign(on_aref_field(visit(node.receiver), args_val), assign_val)
488526
end
489527

528+
# In an alias statement Ripper will emit @kw instead of @ident if the object
529+
# being aliased is a Ruby keyword. For instance, in the line "alias :foo :if",
530+
# the :if is treated as a lexer keyword. So we need to know what symbols are
531+
# also keywords.
532+
RUBY_KEYWORDS = [
533+
"alias",
534+
"and",
535+
"begin",
536+
"BEGIN",
537+
"break",
538+
"case",
539+
"class",
540+
"def",
541+
"defined?",
542+
"do",
543+
"else",
544+
"elsif",
545+
"end",
546+
"END",
547+
"ensure",
548+
"false",
549+
"for",
550+
"if",
551+
"in",
552+
"module",
553+
"next",
554+
"nil",
555+
"not",
556+
"or",
557+
"redo",
558+
"rescue",
559+
"retry",
560+
"return",
561+
"self",
562+
"super",
563+
"then",
564+
"true",
565+
"undef",
566+
"unless",
567+
"until",
568+
"when",
569+
"while",
570+
"yield",
571+
"__ENCODING__",
572+
"__FILE__",
573+
"__LINE__",
574+
]
575+
576+
# Ripper has several methods of emitting a symbol literal. Inside an alias
577+
# sometimes it suppresses the [:symbol] wrapper around ident. If the symbol
578+
# is also the name of a keyword (e.g. :if) it will emit a :@kw wrapper, not
579+
# an :@ident wrapper, with similar treatment for constants and operators.
580+
def visit_symbol_literal_node(node, no_symbol_wrapper: false)
581+
if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s"))
582+
bounds(node.value_loc)
583+
str_val = node.value.to_s
584+
if str_val == ""
585+
return on_dyna_symbol(on_string_content)
586+
else
587+
tstring_val = on_tstring_content(str_val)
588+
return on_dyna_symbol(on_string_add(on_string_content, tstring_val))
589+
end
590+
end
591+
592+
bounds(node.value_loc)
593+
node_name = node.value.to_s
594+
if RUBY_KEYWORDS.include?(node_name)
595+
token_val = on_kw(node_name)
596+
elsif node_name.length == 0
597+
raise NotImplementedError
598+
elsif /[[:upper:]]/.match(node_name[0])
599+
token_val = on_const(node_name)
600+
elsif /[[:punct:]]/.match(node_name[0])
601+
token_val = on_op(node_name)
602+
else
603+
token_val = on_ident(node_name)
604+
end
605+
sym_val = no_symbol_wrapper ? token_val : on_symbol(token_val)
606+
on_symbol_literal(sym_val)
607+
end
608+
490609
# Visit a node that represents a number. We need to explicitly handle the
491610
# unary - operator.
492611
def visit_number(node)

test/prism/ripper_test.rb

Lines changed: 84 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,50 @@
44

55
module Prism
66
class RipperTest < TestCase
7+
def truffleruby?
8+
RUBY_ENGINE == "truffleruby"
9+
end
10+
11+
def windows?
12+
Gem.win_platform?
13+
end
14+
15+
# Ripper produces certain ambiguous structures. For instance, it often
16+
# adds an :args_add_block with "false" as the block meaning there is
17+
# no block call. It can be hard to tell which of multiple equivalent
18+
# structures it will produce. This method attempts to return a normalized
19+
# comparable structure.
20+
def normalized_sexp(parsed)
21+
if parsed.is_a?(Array)
22+
# For args_add_block, if the third entry is nil or false, remove it.
23+
# Note that CRuby Ripper uses false for no block, while older JRuby
24+
# uses nil. We need to do this for both.
25+
return normalized_sexp(parsed[1]) if parsed[0] == :args_add_block && !parsed[2]
26+
27+
parsed.each.with_index do |item, idx|
28+
if item.is_a?(Array)
29+
parsed[idx] = normalized_sexp(parsed[idx])
30+
end
31+
end
32+
end
33+
34+
parsed
35+
end
36+
37+
def assert_ripper_equivalent(source, path: "inline source code")
38+
expected = Ripper.sexp_raw(source)
39+
40+
refute_nil expected, "Could not parse #{path} with Ripper!"
41+
expected = normalized_sexp(expected)
42+
actual = Prism::Translation::Ripper.sexp_raw(source)
43+
refute_nil actual, "Could not parse #{path} with Prism!"
44+
actual = normalized_sexp(actual)
45+
assert_equal expected, actual, "Expected Ripper and Prism to give equivalent output for #{path}!"
46+
end
47+
48+
end
49+
50+
class RipperShortSourceTest < RipperTest
751
def test_binary
852
assert_equivalent("1 + 2")
953
assert_equivalent("3 - 4 * 5")
@@ -36,7 +80,7 @@ def test_method_calls_with_variable_names
3680
assert_equivalent("foo.bar")
3781

3882
# TruffleRuby prints emoji symbols differently in a way that breaks here.
39-
if RUBY_ENGINE != "truffleruby"
83+
unless truffleruby?
4084
assert_equivalent("🗻")
4185
assert_equivalent("🗻.location")
4286
assert_equivalent("foo.🗻")
@@ -57,9 +101,9 @@ def test_method_calls_with_variable_names
57101
def test_method_call_blocks
58102
assert_equivalent("foo { |a| a }")
59103

60-
# assert_equivalent("foo(bar 1)")
61-
# assert_equivalent("foo bar 1")
62-
# assert_equivalent("foo(bar 1) { 7 }")
104+
assert_equivalent("foo(bar 1)")
105+
assert_equivalent("foo bar 1")
106+
assert_equivalent("foo(bar 1) { 7 }")
63107
end
64108

65109
def test_method_calls_on_immediate_values
@@ -69,7 +113,7 @@ def test_method_calls_on_immediate_values
69113
assert_equivalent("7 and 7")
70114
assert_equivalent("7 || 7")
71115
assert_equivalent("7 or 7")
72-
#assert_equivalent("'racecar'.reverse")
116+
assert_equivalent("'racecar'.reverse")
73117
end
74118

75119
def test_range
@@ -142,20 +186,49 @@ def test_assign
142186
assert_equivalent("a = 1")
143187
end
144188

189+
def test_alias
190+
assert_equivalent("alias :foo :bar")
191+
assert_equivalent("alias $a $b")
192+
assert_equivalent("alias $a $'")
193+
assert_equivalent("alias foo bar")
194+
assert_equivalent("alias foo if")
195+
assert_equivalent("alias :'def' :\"abc\#{1}\"")
196+
assert_equivalent("alias :\"abc\#{1}\" :'def'")
197+
198+
unless truffleruby?
199+
assert_equivalent("alias :foo :Ę") # Uppercase Unicode character is a constant
200+
assert_equivalent("alias :Ę :foo")
201+
end
202+
203+
assert_equivalent("alias foo +")
204+
assert_equivalent("alias foo :+")
205+
assert_equivalent("alias :foo :''")
206+
assert_equivalent("alias :'' :foo")
207+
end
208+
209+
# This is *exactly* the kind of thing where Ripper would have a weird
210+
# special case we didn't handle correctly. We're still testing with
211+
# a leading colon since putting random keywords there will often get
212+
# parse errors. Mostly we want to know that Ripper will use :@kw
213+
# instead of :@ident for the lexer symbol for all of these.
214+
def test_keyword_aliases
215+
Prism::Translation::Ripper::RUBY_KEYWORDS.each do |keyword|
216+
assert_equivalent("alias :foo :#{keyword}")
217+
end
218+
end
219+
145220
private
146221

147222
def assert_equivalent(source)
148-
expected = Ripper.sexp_raw(source)
149-
150-
refute_nil expected
151-
assert_equal expected, Prism::Translation::Ripper.sexp_raw(source)
223+
assert_ripper_equivalent(source)
152224
end
153225
end
154226

155-
class RipperFixturesTest < TestCase
227+
class RipperFixturesTest < RipperTest
156228
#base = File.join(__dir__, "fixtures")
157229
#relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
158230
relatives = [
231+
"alias.txt",
159232
"arithmetic.txt",
160233
"booleans.txt",
161234
"boolean_operators.txt",
@@ -172,14 +245,8 @@ class RipperFixturesTest < TestCase
172245
# and explicitly set the external encoding to UTF-8 to override the binmode default.
173246
source = File.read(path, binmode: true, external_encoding: Encoding::UTF_8)
174247

175-
expected = Ripper.sexp_raw(source)
176-
if expected.nil?
177-
puts "Could not parse #{path.inspect}!"
178-
end
179-
refute_nil expected
180-
assert_equal expected, Translation::Ripper.sexp_raw(source)
248+
assert_ripper_equivalent(source, path: path)
181249
end
182250
end
183-
184251
end
185252
end

0 commit comments

Comments
 (0)