@@ -20,6 +20,37 @@ def initialize(source)
20
20
@result = nil
21
21
@lineno = nil
22
22
@column = nil
23
+
24
+ @offset_cache = build_offset_cache ( source )
25
+ @void_stmt_val = on_stmts_add ( on_stmts_new , on_void_stmt )
26
+ end
27
+
28
+ # Excerpt a chunk of the source
29
+ def source_range ( start_c , end_c )
30
+ @source [ @offset_cache [ start_c ] ..@offset_cache [ end_c ] ]
31
+ end
32
+
33
+ # Prism deals with offsets in bytes, while Ripper deals with
34
+ # offsets in characters. We need to handle this conversion in order to
35
+ # build the parser gem AST.
36
+ #
37
+ # If the bytesize of the source is the same as the length, then we can
38
+ # just use the offset directly. Otherwise, we build an array where the
39
+ # index is the byte offset and the value is the character offset.
40
+ def build_offset_cache ( source )
41
+ if source . bytesize == source . length
42
+ -> ( offset ) { offset }
43
+ else
44
+ offset_cache = [ ]
45
+ offset = 0
46
+
47
+ source . each_char do |char |
48
+ char . bytesize . times { offset_cache << offset }
49
+ offset += 1
50
+ end
51
+
52
+ offset_cache << offset
53
+ end
23
54
end
24
55
25
56
############################################################################
@@ -108,9 +139,29 @@ def visit_local_variable_read_node(node)
108
139
def visit_block_node ( node )
109
140
params_val = node . parameters . nil? ? nil : visit ( node . parameters )
110
141
111
- body_val = node . body . nil? ? on_stmts_add ( on_stmts_new , on_void_stmt ) : visit ( node . body )
142
+ # If the body is empty, we use a void statement. If there is
143
+ # a semicolon after the opening delimiter, we append a void
144
+ # statement, unless the body is also empty. So we should never
145
+ # get a double void statement.
146
+
147
+ body_val = if node . body . nil?
148
+ @void_stmt_val
149
+ elsif node_has_semicolon? ( node )
150
+ v = visit ( node . body )
151
+ raise ( NotImplementedError , "Unexpected statement structure #{ v . inspect } " ) if v [ 0 ] != :stmts_add
152
+ v [ 1 ] = @void_stmt_val
153
+ v
154
+ else
155
+ visit ( node . body )
156
+ end
112
157
113
- on_brace_block ( params_val , body_val )
158
+ if node . opening == "{"
159
+ on_brace_block ( params_val , body_val )
160
+ elsif node . opening == "do"
161
+ on_do_block ( params_val , on_bodystmt ( body_val , nil , nil , nil ) )
162
+ else
163
+ raise NotImplementedError , "Unexpected Block opening character!"
164
+ end
114
165
end
115
166
116
167
# Visit a BlockParametersNode.
@@ -218,7 +269,7 @@ def visit_integer_node(node)
218
269
def visit_parentheses_node ( node )
219
270
body =
220
271
if node . body . nil?
221
- on_stmts_add ( on_stmts_new , on_void_stmt )
272
+ @void_stmt_val
222
273
else
223
274
visit ( node . body )
224
275
end
@@ -228,16 +279,80 @@ def visit_parentheses_node(node)
228
279
end
229
280
230
281
# Visit a BeginNode node.
231
- # This is not at all bulletproof against different structures of begin/rescue/else/ensure/end.
232
282
def visit_begin_node ( node )
233
- rescue_val = node . rescue_clause ? on_rescue ( nil , nil , visit ( node . rescue_clause ) , nil ) : nil
234
- ensure_val = node . ensure_clause ? on_ensure ( visit ( node . ensure_clause . statements ) ) : nil
235
- on_begin ( on_bodystmt ( visit ( node . statements ) , rescue_val , nil , ensure_val ) )
283
+ rescue_val = node . rescue_clause ? visit ( node . rescue_clause ) : nil
284
+ ensure_val = node . ensure_clause ? visit ( node . ensure_clause ) : nil
285
+
286
+ if node . statements
287
+ stmts_val = visit ( node . statements )
288
+ if node_has_semicolon? ( node )
289
+ # If there's a semicolon, we need to replace [:stmts_new] with
290
+ # [:stmts_add, [:stmts_new], [:void_stmt]].
291
+ stmts_val [ 1 ] = @void_stmt_val
292
+ end
293
+ else
294
+ stmts_val = @void_stmt_val
295
+ end
296
+
297
+ on_begin ( on_bodystmt ( stmts_val , rescue_val , nil , ensure_val ) )
298
+ end
299
+
300
+ # Visit an EnsureNode node.
301
+ def visit_ensure_node ( node )
302
+ if node . statements
303
+ # If there are any statements, we need to see if there's a semicolon
304
+ # between the ensure and the start of the first statement.
305
+
306
+ stmts_val = visit ( node . statements )
307
+ if node_has_semicolon? ( node )
308
+ # If there's a semicolon, we need to replace [:stmts_new] with
309
+ # [:stmts_add, [:stmts_new], [:void_stmt]].
310
+ stmts_val [ 1 ] = @void_stmt_val
311
+ end
312
+ else
313
+ stmts_val = @void_stmt_val
314
+ end
315
+ on_ensure ( stmts_val )
236
316
end
237
317
238
318
# Visit a RescueNode node.
239
319
def visit_rescue_node ( node )
240
- visit ( node . statements )
320
+ consequent_val = nil
321
+ if node . consequent
322
+ consequent_val = visit ( node . consequent )
323
+ end
324
+
325
+ if node . statements
326
+ stmts_val = visit ( node . statements )
327
+ else
328
+ stmts_val = @void_stmt_val
329
+ end
330
+
331
+ if node . reference
332
+ raise NotImplementedError unless node . reference . is_a? ( LocalVariableTargetNode )
333
+ bounds ( node . reference . location )
334
+ ref_val = on_var_field ( on_ident ( node . reference . name . to_s ) )
335
+ else
336
+ ref_val = nil
337
+ end
338
+
339
+ # No exception(s)
340
+ if !node . exceptions || node . exceptions . empty?
341
+ return on_rescue ( nil , ref_val , stmts_val , consequent_val )
342
+ end
343
+
344
+ exc_vals = node . exceptions . map { |exc | visit ( exc ) }
345
+
346
+ if node . exceptions . length == 1
347
+ return on_rescue ( exc_vals , ref_val , stmts_val , consequent_val )
348
+ end
349
+
350
+ inner_vals = exc_vals [ 0 ..-2 ] . inject ( on_args_new ) do |output , exc_val |
351
+ on_args_add ( output , exc_val )
352
+ end
353
+ exc_vals = on_mrhs_add ( on_mrhs_new_from_args ( inner_vals ) , exc_vals [ -1 ] )
354
+
355
+ on_rescue ( exc_vals , ref_val , stmts_val , consequent_val )
241
356
end
242
357
243
358
# Visit a ProgramNode node.
@@ -284,6 +399,20 @@ def visit_interpolated_string_node(node)
284
399
on_string_literal ( visit_enumerated_node ( node ) )
285
400
end
286
401
402
+ # Visit a ConstantReadNode node.
403
+ def visit_constant_read_node ( node )
404
+ bounds ( node . location )
405
+ on_var_ref ( on_const ( node . name . to_s ) )
406
+ end
407
+
408
+ # Visit a ConstantWriteNode node.
409
+ def visit_constant_write_node ( node )
410
+ bounds ( node . location )
411
+ const_val = on_var_field ( on_const ( node . name . to_s ) )
412
+
413
+ on_assign ( const_val , visit ( node . value ) )
414
+ end
415
+
287
416
# Visit an EmbeddedStatementsNode node.
288
417
def visit_embedded_statements_node ( node )
289
418
visit ( node . statements )
@@ -558,6 +687,43 @@ def visit_binary_operator(node)
558
687
on_binary ( left_val , node . operator . to_sym , right_val )
559
688
end
560
689
690
+ # Some nodes, such as `begin`, `ensure` and `do` may have a semicolon
691
+ # after the keyword and before the first statement. This affects
692
+ # Ripper's return values.
693
+ def node_has_semicolon? ( node )
694
+ first_field , second_field = case node
695
+ when BeginNode
696
+ [ :begin_keyword_loc , :statements ]
697
+ when EnsureNode
698
+ [ :ensure_keyword_loc , :statements ]
699
+ when BlockNode
700
+ [ :opening_loc , :body ]
701
+ else
702
+ raise NotImplementedError
703
+ end
704
+ first_offs , second_offs = delimiter_offsets_for ( node , first_field , second_field )
705
+
706
+ # We need to know if there's a semicolon after the keyword, but before
707
+ # the start of the first statement in the ensure.
708
+ range_has_string? ( first_offs , second_offs , ";" )
709
+ end
710
+
711
+ # For a given node, grab the offsets for the end of the first field
712
+ # and the beginning of the second field.
713
+ def delimiter_offsets_for ( node , first , second )
714
+ first_field = node . send ( first )
715
+ first_end_loc = first_field . start_offset + first_field . length
716
+ second_begin_loc = node . send ( second ) . body [ 0 ] . location . start_offset - 1
717
+ [ first_end_loc , second_begin_loc ]
718
+ end
719
+
720
+ # Check whether the source code contains the given substring between the
721
+ # specified offsets.
722
+ def range_has_string? ( first , last , token )
723
+ sr = source_range ( first , last )
724
+ sr . include? ( token )
725
+ end
726
+
561
727
# This method is responsible for updating lineno and column information
562
728
# to reflect the current node.
563
729
#
0 commit comments