Skip to content

Commit 207f579

Browse files
committed
More rescue and ensure; constant handling
1 parent 44c4dc5 commit 207f579

File tree

3 files changed

+227
-8
lines changed

3 files changed

+227
-8
lines changed

β€Žlib/prism/translation/ripper/ripper_compiler.rbβ€Ž

Lines changed: 174 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,37 @@ def initialize(source)
2020
@result = nil
2121
@lineno = nil
2222
@column = nil
23+
24+
@offset_cache = build_offset_cache(source)
25+
@void_stmt_val = on_stmts_add(on_stmts_new, on_void_stmt)
26+
end
27+
28+
# Excerpt a chunk of the source
29+
def source_range(start_c, end_c)
30+
@source[@offset_cache[start_c]..@offset_cache[end_c]]
31+
end
32+
33+
# Prism deals with offsets in bytes, while Ripper deals with
34+
# offsets in characters. We need to handle this conversion in order to
35+
# build the parser gem AST.
36+
#
37+
# If the bytesize of the source is the same as the length, then we can
38+
# just use the offset directly. Otherwise, we build an array where the
39+
# index is the byte offset and the value is the character offset.
40+
def build_offset_cache(source)
41+
if source.bytesize == source.length
42+
-> (offset) { offset }
43+
else
44+
offset_cache = []
45+
offset = 0
46+
47+
source.each_char do |char|
48+
char.bytesize.times { offset_cache << offset }
49+
offset += 1
50+
end
51+
52+
offset_cache << offset
53+
end
2354
end
2455

2556
############################################################################
@@ -108,9 +139,29 @@ def visit_local_variable_read_node(node)
108139
def visit_block_node(node)
109140
params_val = node.parameters.nil? ? nil : visit(node.parameters)
110141

111-
body_val = node.body.nil? ? on_stmts_add(on_stmts_new, on_void_stmt) : visit(node.body)
142+
# If the body is empty, we use a void statement. If there is
143+
# a semicolon after the opening delimiter, we append a void
144+
# statement, unless the body is also empty. So we should never
145+
# get a double void statement.
146+
147+
body_val = if node.body.nil?
148+
@void_stmt_val
149+
elsif node_has_semicolon?(node)
150+
v = visit(node.body)
151+
raise(NotImplementedError, "Unexpected statement structure #{v.inspect}") if v[0] != :stmts_add
152+
v[1] = @void_stmt_val
153+
v
154+
else
155+
visit(node.body)
156+
end
112157

113-
on_brace_block(params_val, body_val)
158+
if node.opening == "{"
159+
on_brace_block(params_val, body_val)
160+
elsif node.opening == "do"
161+
on_do_block(params_val, on_bodystmt(body_val, nil, nil, nil))
162+
else
163+
raise NotImplementedError, "Unexpected Block opening character!"
164+
end
114165
end
115166

116167
# Visit a BlockParametersNode.
@@ -218,7 +269,7 @@ def visit_integer_node(node)
218269
def visit_parentheses_node(node)
219270
body =
220271
if node.body.nil?
221-
on_stmts_add(on_stmts_new, on_void_stmt)
272+
@void_stmt_val
222273
else
223274
visit(node.body)
224275
end
@@ -228,16 +279,80 @@ def visit_parentheses_node(node)
228279
end
229280

230281
# Visit a BeginNode node.
231-
# This is not at all bulletproof against different structures of begin/rescue/else/ensure/end.
232282
def visit_begin_node(node)
233-
rescue_val = node.rescue_clause ? on_rescue(nil, nil, visit(node.rescue_clause), nil) : nil
234-
ensure_val = node.ensure_clause ? on_ensure(visit(node.ensure_clause.statements)) : nil
235-
on_begin(on_bodystmt(visit(node.statements), rescue_val, nil, ensure_val))
283+
rescue_val = node.rescue_clause ? visit(node.rescue_clause) : nil
284+
ensure_val = node.ensure_clause ? visit(node.ensure_clause) : nil
285+
286+
if node.statements
287+
stmts_val = visit(node.statements)
288+
if node_has_semicolon?(node)
289+
# If there's a semicolon, we need to replace [:stmts_new] with
290+
# [:stmts_add, [:stmts_new], [:void_stmt]].
291+
stmts_val[1] = @void_stmt_val
292+
end
293+
else
294+
stmts_val = @void_stmt_val
295+
end
296+
297+
on_begin(on_bodystmt(stmts_val, rescue_val, nil, ensure_val))
298+
end
299+
300+
# Visit an EnsureNode node.
301+
def visit_ensure_node(node)
302+
if node.statements
303+
# If there are any statements, we need to see if there's a semicolon
304+
# between the ensure and the start of the first statement.
305+
306+
stmts_val = visit(node.statements)
307+
if node_has_semicolon?(node)
308+
# If there's a semicolon, we need to replace [:stmts_new] with
309+
# [:stmts_add, [:stmts_new], [:void_stmt]].
310+
stmts_val[1] = @void_stmt_val
311+
end
312+
else
313+
stmts_val = @void_stmt_val
314+
end
315+
on_ensure(stmts_val)
236316
end
237317

238318
# Visit a RescueNode node.
239319
def visit_rescue_node(node)
240-
visit(node.statements)
320+
consequent_val = nil
321+
if node.consequent
322+
consequent_val = visit(node.consequent)
323+
end
324+
325+
if node.statements
326+
stmts_val = visit(node.statements)
327+
else
328+
stmts_val = @void_stmt_val
329+
end
330+
331+
if node.reference
332+
raise NotImplementedError unless node.reference.is_a?(LocalVariableTargetNode)
333+
bounds(node.reference.location)
334+
ref_val = on_var_field(on_ident(node.reference.name.to_s))
335+
else
336+
ref_val = nil
337+
end
338+
339+
# No exception(s)
340+
if !node.exceptions || node.exceptions.empty?
341+
return on_rescue(nil, ref_val, stmts_val, consequent_val)
342+
end
343+
344+
exc_vals = node.exceptions.map { |exc| visit(exc) }
345+
346+
if node.exceptions.length == 1
347+
return on_rescue(exc_vals, ref_val, stmts_val, consequent_val)
348+
end
349+
350+
inner_vals = exc_vals[0..-2].inject(on_args_new) do |output, exc_val|
351+
on_args_add(output, exc_val)
352+
end
353+
exc_vals = on_mrhs_add(on_mrhs_new_from_args(inner_vals), exc_vals[-1])
354+
355+
on_rescue(exc_vals, ref_val, stmts_val, consequent_val)
241356
end
242357

243358
# Visit a ProgramNode node.
@@ -284,6 +399,20 @@ def visit_interpolated_string_node(node)
284399
on_string_literal(visit_enumerated_node(node))
285400
end
286401

402+
# Visit a ConstantReadNode node.
403+
def visit_constant_read_node(node)
404+
bounds(node.location)
405+
on_var_ref(on_const(node.name.to_s))
406+
end
407+
408+
# Visit a ConstantWriteNode node.
409+
def visit_constant_write_node(node)
410+
bounds(node.location)
411+
const_val = on_var_field(on_const(node.name.to_s))
412+
413+
on_assign(const_val, visit(node.value))
414+
end
415+
287416
# Visit an EmbeddedStatementsNode node.
288417
def visit_embedded_statements_node(node)
289418
visit(node.statements)
@@ -558,6 +687,43 @@ def visit_binary_operator(node)
558687
on_binary(left_val, node.operator.to_sym, right_val)
559688
end
560689

690+
# Some nodes, such as `begin`, `ensure` and `do` may have a semicolon
691+
# after the keyword and before the first statement. This affects
692+
# Ripper's return values.
693+
def node_has_semicolon?(node)
694+
first_field, second_field = case node
695+
when BeginNode
696+
[:begin_keyword_loc, :statements]
697+
when EnsureNode
698+
[:ensure_keyword_loc, :statements]
699+
when BlockNode
700+
[:opening_loc, :body]
701+
else
702+
raise NotImplementedError
703+
end
704+
first_offs, second_offs = delimiter_offsets_for(node, first_field, second_field)
705+
706+
# We need to know if there's a semicolon after the keyword, but before
707+
# the start of the first statement in the ensure.
708+
range_has_string?(first_offs, second_offs, ";")
709+
end
710+
711+
# For a given node, grab the offsets for the end of the first field
712+
# and the beginning of the second field.
713+
def delimiter_offsets_for(node, first, second)
714+
first_field = node.send(first)
715+
first_end_loc = first_field.start_offset + first_field.length
716+
second_begin_loc = node.send(second).body[0].location.start_offset - 1
717+
[first_end_loc, second_begin_loc]
718+
end
719+
720+
# Check whether the source code contains the given substring between the
721+
# specified offsets.
722+
def range_has_string?(first, last, token)
723+
sr = source_range(first, last)
724+
sr.include?(token)
725+
end
726+
561727
# This method is responsible for updating lineno and column information
562728
# to reflect the current node.
563729
#

β€Žprism.gemspecβ€Ž

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Gem::Specification.new do |spec|
9393
"lib/prism/translation/parser/lexer.rb",
9494
"lib/prism/translation/parser/rubocop.rb",
9595
"lib/prism/translation/ripper.rb",
96+
"lib/prism/translation/ripper/ripper_compiler.rb",
9697
"lib/prism/translation/ruby_parser.rb",
9798
"lib/prism/visitor.rb",
9899
"src/diagnostic.c",

β€Žtest/prism/ripper_test.rbβ€Ž

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ def test_method_call_blocks
104104
assert_equivalent("foo(bar 1)")
105105
assert_equivalent("foo bar 1")
106106
assert_equivalent("foo(bar 1) { 7 }")
107+
assert_equivalent("foo(bar 1) {; 7 }")
108+
assert_equivalent("foo(bar 1) {;}")
109+
110+
assert_equivalent("foo do\n bar\nend")
111+
assert_equivalent("foo do\nend")
112+
assert_equivalent("foo do; end")
113+
assert_equivalent("foo do bar; end")
114+
assert_equivalent("foo do bar end")
115+
assert_equivalent("foo do; bar; end")
107116
end
108117

109118
def test_method_calls_on_immediate_values
@@ -137,8 +146,45 @@ def test_numbers
137146
assert_equivalent("[1ri, -1ri, +1ri, 1.5ri, -1.5ri, +1.5ri]")
138147
end
139148

149+
def test_begin_end
150+
# Empty begin
151+
assert_equivalent("begin; end")
152+
assert_equivalent("begin end")
153+
assert_equivalent("begin; rescue; end")
154+
155+
assert_equivalent("begin:s.l end")
156+
end
157+
140158
def test_begin_rescue
159+
# Rescue with exception(s)
160+
assert_equivalent("begin a; rescue Exception => ex; c; end")
161+
assert_equivalent("begin a; rescue RuntimeError => ex; c; rescue Exception => ex; d; end")
162+
assert_equivalent("begin a; rescue RuntimeError => ex; c; rescue Exception => ex; end")
163+
assert_equivalent("begin a; rescue RuntimeError,FakeError,Exception => ex; c; end")
164+
assert_equivalent("begin a; rescue RuntimeError,FakeError,Exception; c; end")
165+
166+
# Empty rescue
167+
assert_equivalent("begin a; rescue; ensure b; end")
168+
assert_equivalent("begin a; rescue; end")
169+
170+
assert_equivalent("begin; a; ensure; b; end")
171+
end
172+
173+
def test_begin_ensure
174+
# Empty ensure
175+
assert_equivalent("begin a; rescue; c; ensure; end")
176+
assert_equivalent("begin a; ensure; end")
177+
assert_equivalent("begin; ensure; end")
178+
179+
# Ripper treats statements differently, depending whether there's
180+
# a semicolon after the keyword.
141181
assert_equivalent("begin a; rescue; c; ensure b; end")
182+
assert_equivalent("begin a; rescue c; ensure b; end")
183+
assert_equivalent("begin a; rescue; c; ensure; b; end")
184+
185+
# Need to make sure we're handling multibyte characters correctly for source offsets
186+
assert_equivalent("begin πŸ—»; rescue; c; ensure;πŸ—»πŸ—»πŸ—»πŸ—»πŸ—»; end")
187+
assert_equivalent("begin πŸ—»; rescue; c; ensure πŸ—»πŸ—»πŸ—»πŸ—»πŸ—»; end")
142188
end
143189

144190
def test_break
@@ -147,6 +193,12 @@ def test_break
147193
assert_equivalent("foo { break [1, 2, 3] }")
148194
end
149195

196+
def test_constants
197+
assert_equivalent("Foo")
198+
assert_equivalent("Foo + FπŸ—»")
199+
assert_equivalent("Foo = 'soda'")
200+
end
201+
150202
def test_op_assign
151203
assert_equivalent("a += b")
152204
assert_equivalent("a -= b")

0 commit comments

Comments
Β (0)