Skip to content

Commit ddc6991

Browse files
vinistockkddnewton
andcommitted
Add ParseResult#attach_comments! to tie comments to their locations
Co-authored-by: Kevin Newton <kddnewton@users.noreply.github.com>
1 parent 754bc88 commit ddc6991

File tree

2 files changed

+191
-0
lines changed

2 files changed

+191
-0
lines changed

lib/yarp.rb

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ def line(value)
2020
offsets.bsearch_index { |offset| offset > value } || offsets.length
2121
end
2222

23+
def line_offset(value)
24+
offsets[line(value) - 1]
25+
end
26+
2327
def column(value)
2428
value - offsets[line(value) - 1]
2529
end
@@ -46,10 +50,14 @@ class Location
4650
# The length of this location in bytes.
4751
attr_reader :length
4852

53+
# The list of comments attached to this location
54+
attr_reader :comments
55+
4956
def initialize(source, start_offset, length)
5057
@source = source
5158
@start_offset = start_offset
5259
@length = length
60+
@comments = []
5361
end
5462

5563
# Create a new location object with the given options.
@@ -81,6 +89,12 @@ def start_line
8189
source.line(start_offset)
8290
end
8391

92+
# The content of the line where this location starts before this location.
93+
def start_line_slice
94+
offset = source.line_offset(start_offset)
95+
source.slice(offset, start_offset - offset)
96+
end
97+
8498
# The line number where this location ends.
8599
def end_line
86100
source.line(end_offset - 1)
@@ -142,6 +156,11 @@ def deconstruct_keys(keys)
142156
{ type: type, location: location }
143157
end
144158

159+
# Returns true if the comment happens on the same line as other code and false if the comment is by itself
160+
def trailing?
161+
type == :inline && !location.start_line_slice.strip.empty?
162+
end
163+
145164
def inspect
146165
"#<YARP::Comment @type=#{@type.inspect} @location=#{@location.inspect}>"
147166
end
@@ -230,6 +249,154 @@ def failure?
230249
!success?
231250
end
232251

252+
# CommentAttacher is a utility class to attach comments to locations in the AST
253+
class CommentAttacher
254+
attr_reader :parse_result
255+
256+
def initialize(parse_result)
257+
@parse_result = parse_result
258+
end
259+
260+
def attach!
261+
parse_result.comments.each do |comment|
262+
preceding, enclosing, following = nearest_targets(parse_result.value, comment)
263+
target =
264+
if comment.trailing?
265+
preceding || following || enclosing || NodeTarget.new(parse_result.value)
266+
else
267+
# If a comment exists on its own line, prefer a leading comment.
268+
following || preceding || enclosing || NodeTarget.new(parse_result.value)
269+
end
270+
271+
target << comment
272+
end
273+
end
274+
275+
# A target for attaching comments that is based on a specific node
276+
class NodeTarget
277+
attr_reader :node
278+
279+
def initialize(node)
280+
@node = node
281+
end
282+
283+
def start_offset
284+
node.location.start_offset
285+
end
286+
287+
def end_offset
288+
node.location.end_offset
289+
end
290+
291+
def encloses?(comment)
292+
start_offset <= comment.location.start_offset && comment.location.end_offset <= end_offset
293+
end
294+
295+
def <<(comment)
296+
node.location.comments << comment
297+
end
298+
end
299+
300+
# A target for attaching comments that is based on a location, which could be a part of a node. For example, the
301+
# `end` token of a ClassNode
302+
class LocationTarget
303+
attr_reader :location
304+
305+
def initialize(location)
306+
@location = location
307+
end
308+
309+
def start_offset
310+
location.start_offset
311+
end
312+
313+
def end_offset
314+
location.end_offset
315+
end
316+
317+
def encloses?(comment)
318+
false
319+
end
320+
321+
def <<(comment)
322+
location.comments << comment
323+
end
324+
end
325+
326+
private
327+
328+
# Responsible for finding the nearest targets to the given comment within the context of the given encapsulating
329+
# node.
330+
def nearest_targets(node, comment)
331+
comment_start = comment.location.start_offset
332+
comment_end = comment.location.end_offset
333+
334+
targets = []
335+
node.deconstruct_keys(nil).each do |key, value|
336+
next if key == :location
337+
338+
case value
339+
when StatementsNode
340+
targets.concat(value.body.map { |node| NodeTarget.new(node) })
341+
when Node
342+
targets << NodeTarget.new(value)
343+
when Location
344+
targets << LocationTarget.new(value)
345+
when Array
346+
targets.concat(value.map { |node| NodeTarget.new(node) }) if value.first.is_a?(Node)
347+
end
348+
end
349+
350+
targets.sort_by!(&:start_offset)
351+
preceding = nil
352+
following = nil
353+
354+
left = 0
355+
right = targets.length
356+
357+
# This is a custom binary search that finds the nearest nodes to the given comment. When it finds a node that
358+
# completely encapsulates the comment, it recursed downward into the tree.
359+
while left < right
360+
middle = (left + right) / 2
361+
target = targets[middle]
362+
363+
target_start = target.start_offset
364+
target_end = target.end_offset
365+
366+
if target.encloses?(comment)
367+
# The comment is completely contained by this target. Abandon the binary search at this level.
368+
return nearest_targets(target.node, comment)
369+
end
370+
371+
if target_end <= comment_start
372+
# This target falls completely before the comment. Because we will never consider this target or any targets
373+
# before it again, this target must be the closest preceding target we have encountered so far.
374+
preceding = target
375+
left = middle + 1
376+
next
377+
end
378+
379+
if comment_end <= target_start
380+
# This target falls completely after the comment. Because we will never consider this target or any targets
381+
# after it again, this target must be the closest following target we have encountered so far.
382+
following = target
383+
right = middle
384+
next
385+
end
386+
387+
# This should only happen if there is a bug in this parser.
388+
raise "Comment location overlaps with target location"
389+
end
390+
391+
[preceding, NodeTarget.new(node), following]
392+
end
393+
end
394+
395+
# Attach the list of comments to their respective locations in the AST
396+
def attach_comments!
397+
CommentAttacher.new(self).attach!
398+
end
399+
233400
# Keep in sync with Java MarkNewlinesVisitor
234401
class MarkNewlinesVisitor < YARP::Visitor
235402
def initialize(newline_marked)

test/yarp/comments_test.rb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,30 @@ def test_comment_embedded_document_with_content_on_same_line
5555
assert_comment source, :embdoc, 0..24
5656
end
5757

58+
def test_attaching_comments
59+
source = <<~RUBY
60+
# Foo class
61+
class Foo
62+
# bar method
63+
def bar
64+
# baz invocation
65+
baz
66+
end # bar end
67+
end # Foo end
68+
RUBY
69+
70+
result = YARP.parse(source)
71+
result.attach_comments!
72+
tree = result.value
73+
class_node = tree.statements.body.first
74+
method_node = class_node.body.body.first
75+
call_node = method_node.body.body.first
76+
77+
assert_equal("# Foo class\n# Foo end\n", class_node.location.comments.map { |c| c.location.slice }.join)
78+
assert_equal("# bar method\n# bar end\n", method_node.location.comments.map { |c| c.location.slice }.join)
79+
assert_equal("# baz invocation\n", call_node.location.comments.map { |c| c.location.slice }.join)
80+
end
81+
5882
private
5983

6084
def assert_comment(source, type, location)

0 commit comments

Comments
 (0)