/
duplicate_regexp_character_class_element.rb
121 lines (96 loc) · 3.49 KB
/
duplicate_regexp_character_class_element.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# frozen_string_literal: true
module RuboCop
module Cop
module Lint
# Checks for duplicate elements in Regexp character classes.
#
# @example
#
# # bad
# r = /[xyx]/
#
# # bad
# r = /[0-9x0-9]/
#
# # good
# r = /[xy]/
#
# # good
# r = /[0-9x]/
class DuplicateRegexpCharacterClassElement < Base
include RangeHelp
extend AutoCorrector
MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class'
OCTAL_DIGITS_AFTER_ESCAPE = 2
def on_regexp(node)
each_repeated_character_class_element_loc(node) do |loc|
add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector|
corrector.remove(loc)
end
end
end
def each_repeated_character_class_element_loc(node)
node.parsed_tree&.each_expression do |expr|
next if skip_expression?(expr)
seen = Set.new
group_expressions(node, expr.expressions) do |group|
group_source = group.map(&:to_s).join
yield source_range(group) if seen.include?(group_source)
seen << group_source
end
end
end
private
def group_expressions(node, expressions)
# Create a mutable list to simplify state tracking while we iterate.
expressions = expressions.to_a
until expressions.empty?
# With we may need to compose a group of multiple expressions.
group = [expressions.shift]
next if within_interpolation?(node, group.first)
# With regexp_parser < 2.7 escaped octal sequences may be up to 3
# separate expressions ("\\0", "0", "1").
pop_octal_digits(group, expressions) if escaped_octal?(group.first.to_s)
yield(group)
end
end
def pop_octal_digits(current_child, expressions)
OCTAL_DIGITS_AFTER_ESCAPE.times do
next_child = expressions.first
break unless octal?(next_child.to_s)
current_child << expressions.shift
end
end
def source_range(children)
return children.first.expression if children.size == 1
range_between(
children.first.expression.begin_pos,
children.last.expression.begin_pos + children.last.to_s.length
)
end
def skip_expression?(expr)
expr.type != :set || expr.token == :intersection
end
# Since we blank interpolations with a space for every char of the interpolation, we would
# mark every space (except the first) as duplicate if we do not skip regexp_parser nodes
# that are within an interpolation.
def within_interpolation?(node, child)
parse_tree_child_loc = child.expression
interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) }
end
def escaped_octal?(string)
string.length == 2 && string[0] == '\\' && octal?(string[1])
end
def octal?(char)
('0'..'7').cover?(char)
end
def interpolation_locs(node)
@interpolation_locs ||= {}
# Cache by loc, not by regexp content, as content can be repeated in multiple patterns
key = node.loc
@interpolation_locs[key] ||= node.children.select(&:begin_type?).map(&:source_range)
end
end
end
end
end