Skip to content

Commit

Permalink
Support meta chars for StartWith and EndWith cops
Browse files Browse the repository at this point in the history
This PR supports regexp metacharacter `^` for `Performance/StartWith` cop
and regexp metacharacter `$` for `Performance/EndWith` cop.
  • Loading branch information
koic committed May 13, 2020
1 parent 204ea8d commit 56cb483
Show file tree
Hide file tree
Showing 11 changed files with 103 additions and 55 deletions.
1 change: 1 addition & 0 deletions .#CHANGELOG.md
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

* [#77](https://github.com/rubocop-hq/rubocop-performance/issues/77): Add new `Performance/BindCall` cop. ([@koic][])
* [#105](https://github.com/rubocop-hq/rubocop-performance/pull/105): Add new `Performance/DeletePrefix` and `Performance/DeleteSuffix` cops. ([@koic][])
* [#106](https://github.com/rubocop-hq/rubocop-performance/pull/106): Support regexp metacharacter `^` for `Performance/StartWith` cop and regexp metacharacter `$` for `Performance/EndWith` cop. ([@koic][])

### Changes

Expand Down
41 changes: 41 additions & 0 deletions lib/rubocop/cop/mixin/regexp_metacharacter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# frozen_string_literal: true

module RuboCop
module Cop
# Common functionality for handling regexp metacharacters.
module RegexpMetacharacter
def literal_at_start?(regex_str)
# is this regexp 'literal' in the sense of only matching literal
# chars, rather than using metachars like `.` and `*` and so on?
# also, is it anchored at the start of the string?
# (tricky: \s, \d, and so on are metacharacters, but other characters
# escaped with a slash are just literals. LITERAL_REGEX takes all
# that into account.)
regex_str =~ /\A(\\A|\^)(?:#{Util::LITERAL_REGEX})+\z/
end

def literal_at_end?(regex_str)
# is this regexp 'literal' in the sense of only matching literal
# chars, rather than using metachars like . and * and so on?
# also, is it anchored at the end of the string?
regex_str =~ /\A(?:#{Util::LITERAL_REGEX})+(\\z|\$)\z/
end

def drop_start_metacharacter(regexp_string)
if regexp_string.start_with?('\\A')
regexp_string[2..-1] # drop `\A` anchor
else
regexp_string[1..-1] # drop `^` anchor
end
end

def drop_end_metacharacter(regexp_string)
if regexp_string.end_with?('\\z')
regexp_string.chomp('\z') # drop `\z` anchor
else
regexp_string.chop # drop `$` anchor
end
end
end
end
end
20 changes: 2 additions & 18 deletions lib/rubocop/cop/performance/delete_prefix.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module Performance
#
class DeletePrefix < Cop
extend TargetRubyVersion
include RegexpMetacharacter

minimum_target_ruby_version 2.5

Expand Down Expand Up @@ -55,12 +56,7 @@ def autocorrect(node)
gsub_method?(node) do |receiver, bad_method, regexp_str, _|
lambda do |corrector|
good_method = PREFERRED_METHODS[bad_method]

regexp_str = if regexp_str.start_with?('\\A')
regexp_str[2..-1] # drop `\A` anchor
else
regexp_str[1..-1] # drop `^` anchor
end
regexp_str = drop_start_metacharacter(regexp_str)
regexp_str = interpret_string_escapes(regexp_str)
string_literal = to_string_literal(regexp_str)

Expand All @@ -70,18 +66,6 @@ def autocorrect(node)
end
end
end

private

def literal_at_start?(regex_str)
# is this regexp 'literal' in the sense of only matching literal
# chars, rather than using metachars like `.` and `*` and so on?
# also, is it anchored at the start of the string?
# (tricky: \s, \d, and so on are metacharacters, but other characters
# escaped with a slash are just literals. LITERAL_REGEX takes all
# that into account.)
regex_str =~ /\A(\\A|\^)(?:#{LITERAL_REGEX})+\z/
end
end
end
end
Expand Down
20 changes: 2 additions & 18 deletions lib/rubocop/cop/performance/delete_suffix.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module Performance
#
class DeleteSuffix < Cop
extend TargetRubyVersion
include RegexpMetacharacter

minimum_target_ruby_version 2.5

Expand Down Expand Up @@ -55,12 +56,7 @@ def autocorrect(node)
gsub_method?(node) do |receiver, bad_method, regexp_str, _|
lambda do |corrector|
good_method = PREFERRED_METHODS[bad_method]

regexp_str = if regexp_str.end_with?('\\z')
regexp_str.chomp('\z') # drop `\z` anchor
else
regexp_str.chop # drop `$` anchor
end
regexp_str = drop_end_metacharacter(regexp_str)
regexp_str = interpret_string_escapes(regexp_str)
string_literal = to_string_literal(regexp_str)

Expand All @@ -70,18 +66,6 @@ def autocorrect(node)
end
end
end

private

def literal_at_end?(regex_str)
# is this regexp 'literal' in the sense of only matching literal
# chars, rather than using metachars like `.` and `*` and so on?
# also, is it anchored at the start of the string?
# (tricky: \s, \d, and so on are metacharacters, but other characters
# escaped with a slash are just literals. LITERAL_REGEX takes all
# that into account.)
regex_str =~ /\A(?:#{LITERAL_REGEX})+(\\z|\$)\z/
end
end
end
end
Expand Down
18 changes: 10 additions & 8 deletions lib/rubocop/cop/performance/end_with.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,18 @@ module Performance
# 'abc'.match(/bc\Z/)
# /bc\Z/.match('abc')
#
# 'abc'.match?(/bc$/)
# /bc$/.match?('abc')
# 'abc' =~ /bc$/
# /bc$/ =~ 'abc'
# 'abc'.match(/bc$/)
# /bc$/.match('abc')
#
# # good
# 'abc'.end_with?('bc')
class EndWith < Cop
include RegexpMetacharacter

MSG = 'Use `String#end_with?` instead of a regex match anchored to ' \
'the end of the string.'
SINGLE_QUOTE = "'"
Expand All @@ -28,13 +37,6 @@ class EndWith < Cop
(match-with-lvasgn (regexp (str $#literal_at_end?) (regopt)) $_)}
PATTERN

def literal_at_end?(regex_str)
# is this regexp 'literal' in the sense of only matching literal
# chars, rather than using metachars like . and * and so on?
# also, is it anchored at the end of the string?
regex_str =~ /\A(?:#{LITERAL_REGEX})+\\z\z/
end

def on_send(node)
return unless redundant_regex?(node)

Expand All @@ -45,7 +47,7 @@ def on_send(node)
def autocorrect(node)
redundant_regex?(node) do |receiver, regex_str|
receiver, regex_str = regex_str, receiver if receiver.is_a?(String)
regex_str = regex_str[0..-3] # drop \Z anchor
regex_str = drop_end_metacharacter(regex_str)
regex_str = interpret_string_escapes(regex_str)

lambda do |corrector|
Expand Down
21 changes: 10 additions & 11 deletions lib/rubocop/cop/performance/start_with.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,18 @@ module Performance
# 'abc'.match(/\Aab/)
# /\Aab/.match('abc')
#
# 'abc'.match?(/^ab/)
# /^ab/.match?('abc')
# 'abc' =~ /^ab/
# /^ab/ =~ 'abc'
# 'abc'.match(/^ab/)
# /^ab/.match('abc')
#
# # good
# 'abc'.start_with?('ab')
class StartWith < Cop
include RegexpMetacharacter

MSG = 'Use `String#start_with?` instead of a regex match anchored to ' \
'the beginning of the string.'
SINGLE_QUOTE = "'"
Expand All @@ -28,16 +37,6 @@ class StartWith < Cop
(match-with-lvasgn (regexp (str $#literal_at_start?) (regopt)) $_)}
PATTERN

def literal_at_start?(regex_str)
# is this regexp 'literal' in the sense of only matching literal
# chars, rather than using metachars like `.` and `*` and so on?
# also, is it anchored at the start of the string?
# (tricky: \s, \d, and so on are metacharacters, but other characters
# escaped with a slash are just literals. LITERAL_REGEX takes all
# that into account.)
regex_str =~ /\A\\A(?:#{LITERAL_REGEX})+\z/
end

def on_send(node)
return unless redundant_regex?(node)

Expand All @@ -48,7 +47,7 @@ def on_send(node)
def autocorrect(node)
redundant_regex?(node) do |receiver, regex_str|
receiver, regex_str = regex_str, receiver if receiver.is_a?(String)
regex_str = regex_str[2..-1] # drop \A anchor
regex_str = drop_start_metacharacter(regex_str)
regex_str = interpret_string_escapes(regex_str)

lambda do |corrector|
Expand Down
2 changes: 2 additions & 0 deletions lib/rubocop/cop/performance_cops.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require_relative 'mixin/regexp_metacharacter'

require_relative 'performance/bind_call'
require_relative 'performance/caller'
require_relative 'performance/case_when_splat'
Expand Down
14 changes: 14 additions & 0 deletions manual/cops_performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,13 @@ would suffice.
'abc'.match(/bc\Z/)
/bc\Z/.match('abc')

'abc'.match?(/bc$/)
/bc$/.match?('abc')
'abc' =~ /bc$/
/bc$/ =~ 'abc'
'abc'.match(/bc$/)
/bc$/.match('abc')

# good
'abc'.end_with?('bc')
```
Expand Down Expand Up @@ -860,6 +867,13 @@ This cop identifies unnecessary use of a regex where
'abc'.match(/\Aab/)
/\Aab/.match('abc')

'abc'.match?(/^ab/)
/^ab/.match?('abc')
'abc' =~ /^ab/
/^ab/ =~ 'abc'
'abc'.match(/^ab/)
/^ab/.match('abc')

# good
'abc'.start_with?('ab')
```
Expand Down
10 changes: 10 additions & 0 deletions spec/rubocop/cop/performance/end_with_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
expect(new_source).to eq "str.end_with?('abc')"
end

it "autocorrects str#{method} /abc$/" do
new_source = autocorrect_source("str#{method} /abc$/")
expect(new_source).to eq "str.end_with?('abc')"
end

it "autocorrects /abc$/#{method} str" do
new_source = autocorrect_source("/abc$/#{method} str")
expect(new_source).to eq "str.end_with?('abc')"
end

it "autocorrects str#{method} /\\n\\z/" do
new_source = autocorrect_source("str#{method} /\\n\\z/")
expect(new_source).to eq 'str.end_with?("\n")'
Expand Down
10 changes: 10 additions & 0 deletions spec/rubocop/cop/performance/start_with_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
expect(new_source).to eq "str.start_with?('abc')"
end

it "autocorrects str#{method} /^abc/" do
new_source = autocorrect_source("str#{method} /^abc/")
expect(new_source).to eq "str.start_with?('abc')"
end

it "autocorrects /^abc/#{method} str" do
new_source = autocorrect_source("/^abc/#{method} str")
expect(new_source).to eq "str.start_with?('abc')"
end

# escapes like "\n"
# note that "\b" is a literal backspace char in a double-quoted string...
# but in a regex, it's an anchor on a word boundary
Expand Down

0 comments on commit 56cb483

Please sign in to comment.