Support meta chars for StartWith and EndWith cops

This PR supports regexp metacharacter `^` for `Performance/StartWith` cop and regexp metacharacter `$` for `Performance/EndWith` cop.
rubocop · May 13, 2020 · 56cb483 · 56cb483
1 parent 204ea8d
commit 56cb483
Show file tree

Hide file tree

Showing 11 changed files with 103 additions and 55 deletions.
diff --git a/.#CHANGELOG.md b/.#CHANGELOG.md
@@ -0,0 +1 @@
+koic@atelier.1968
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 * [#77](https://github.com/rubocop-hq/rubocop-performance/issues/77): Add new `Performance/BindCall` cop. ([@koic][])
 * [#105](https://github.com/rubocop-hq/rubocop-performance/pull/105): Add new `Performance/DeletePrefix` and `Performance/DeleteSuffix` cops. ([@koic][])
+* [#106](https://github.com/rubocop-hq/rubocop-performance/pull/106): Support regexp metacharacter `^` for `Performance/StartWith` cop and regexp metacharacter `$` for `Performance/EndWith` cop. ([@koic][])
 
 ### Changes
 

diff --git a/lib/rubocop/cop/mixin/regexp_metacharacter.rb b/lib/rubocop/cop/mixin/regexp_metacharacter.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module RuboCop
+  module Cop
+    # Common functionality for handling regexp metacharacters.
+    module RegexpMetacharacter
+      def literal_at_start?(regex_str)
+        # is this regexp 'literal' in the sense of only matching literal
+        # chars, rather than using metachars like `.` and `*` and so on?
+        # also, is it anchored at the start of the string?
+        # (tricky: \s, \d, and so on are metacharacters, but other characters
+        #  escaped with a slash are just literals. LITERAL_REGEX takes all
+        #  that into account.)
+        regex_str =~ /\A(\\A|\^)(?:#{Util::LITERAL_REGEX})+\z/
+      end
+
+      def literal_at_end?(regex_str)
+        # is this regexp 'literal' in the sense of only matching literal
+        # chars, rather than using metachars like . and * and so on?
+        # also, is it anchored at the end of the string?
+        regex_str =~ /\A(?:#{Util::LITERAL_REGEX})+(\\z|\$)\z/
+      end
+
+      def drop_start_metacharacter(regexp_string)
+        if regexp_string.start_with?('\\A')
+          regexp_string[2..-1] # drop `\A` anchor
+        else
+          regexp_string[1..-1] # drop `^` anchor
+        end
+      end
+
+      def drop_end_metacharacter(regexp_string)
+        if regexp_string.end_with?('\\z')
+          regexp_string.chomp('\z') # drop `\z` anchor
+        else
+          regexp_string.chop # drop `$` anchor
+        end
+      end
+    end
+  end
+end
diff --git a/lib/rubocop/cop/performance/delete_prefix.rb b/lib/rubocop/cop/performance/delete_prefix.rb
@@ -25,6 +25,7 @@ module Performance
       #
       class DeletePrefix < Cop
         extend TargetRubyVersion
+        include RegexpMetacharacter
 
         minimum_target_ruby_version 2.5
 
@@ -55,12 +56,7 @@ def autocorrect(node)
           gsub_method?(node) do |receiver, bad_method, regexp_str, _|
             lambda do |corrector|
               good_method = PREFERRED_METHODS[bad_method]
-
-              regexp_str = if regexp_str.start_with?('\\A')
-                             regexp_str[2..-1] # drop `\A` anchor
-                           else
-                             regexp_str[1..-1] # drop `^` anchor
-                           end
+              regexp_str = drop_start_metacharacter(regexp_str)
               regexp_str = interpret_string_escapes(regexp_str)
               string_literal = to_string_literal(regexp_str)
 
@@ -70,18 +66,6 @@ def autocorrect(node)
             end
           end
         end
-
-        private
-
-        def literal_at_start?(regex_str)
-          # is this regexp 'literal' in the sense of only matching literal
-          # chars, rather than using metachars like `.` and `*` and so on?
-          # also, is it anchored at the start of the string?
-          # (tricky: \s, \d, and so on are metacharacters, but other characters
-          #  escaped with a slash are just literals. LITERAL_REGEX takes all
-          #  that into account.)
-          regex_str =~ /\A(\\A|\^)(?:#{LITERAL_REGEX})+\z/
-        end
       end
     end
   end

diff --git a/lib/rubocop/cop/performance/delete_suffix.rb b/lib/rubocop/cop/performance/delete_suffix.rb
@@ -25,6 +25,7 @@ module Performance
       #
       class DeleteSuffix < Cop
         extend TargetRubyVersion
+        include RegexpMetacharacter
 
         minimum_target_ruby_version 2.5
 
@@ -55,12 +56,7 @@ def autocorrect(node)
           gsub_method?(node) do |receiver, bad_method, regexp_str, _|
             lambda do |corrector|
               good_method = PREFERRED_METHODS[bad_method]
-
-              regexp_str = if regexp_str.end_with?('\\z')
-                             regexp_str.chomp('\z') # drop `\z` anchor
-                           else
-                             regexp_str.chop # drop `$` anchor
-                           end
+              regexp_str = drop_end_metacharacter(regexp_str)
               regexp_str = interpret_string_escapes(regexp_str)
               string_literal = to_string_literal(regexp_str)
 
@@ -70,18 +66,6 @@ def autocorrect(node)
             end
           end
         end
-
-        private
-
-        def literal_at_end?(regex_str)
-          # is this regexp 'literal' in the sense of only matching literal
-          # chars, rather than using metachars like `.` and `*` and so on?
-          # also, is it anchored at the start of the string?
-          # (tricky: \s, \d, and so on are metacharacters, but other characters
-          #  escaped with a slash are just literals. LITERAL_REGEX takes all
-          #  that into account.)
-          regex_str =~ /\A(?:#{LITERAL_REGEX})+(\\z|\$)\z/
-        end
       end
     end
   end

diff --git a/lib/rubocop/cop/performance/end_with.rb b/lib/rubocop/cop/performance/end_with.rb
@@ -15,9 +15,18 @@ module Performance
       #   'abc'.match(/bc\Z/)
       #   /bc\Z/.match('abc')
       #
+      #   'abc'.match?(/bc$/)
+      #   /bc$/.match?('abc')
+      #   'abc' =~ /bc$/
+      #   /bc$/ =~ 'abc'
+      #   'abc'.match(/bc$/)
+      #   /bc$/.match('abc')
+      #
       #   # good
       #   'abc'.end_with?('bc')
       class EndWith < Cop
+        include RegexpMetacharacter
+
         MSG = 'Use `String#end_with?` instead of a regex match anchored to ' \
               'the end of the string.'
         SINGLE_QUOTE = "'"
@@ -28,13 +37,6 @@ class EndWith < Cop
            (match-with-lvasgn (regexp (str $#literal_at_end?) (regopt)) $_)}
         PATTERN
 
-        def literal_at_end?(regex_str)
-          # is this regexp 'literal' in the sense of only matching literal
-          # chars, rather than using metachars like . and * and so on?
-          # also, is it anchored at the end of the string?
-          regex_str =~ /\A(?:#{LITERAL_REGEX})+\\z\z/
-        end
-
         def on_send(node)
           return unless redundant_regex?(node)
 
@@ -45,7 +47,7 @@ def on_send(node)
         def autocorrect(node)
           redundant_regex?(node) do |receiver, regex_str|
             receiver, regex_str = regex_str, receiver if receiver.is_a?(String)
-            regex_str = regex_str[0..-3] # drop \Z anchor
+            regex_str = drop_end_metacharacter(regex_str)
             regex_str = interpret_string_escapes(regex_str)
 
             lambda do |corrector|

diff --git a/lib/rubocop/cop/performance/start_with.rb b/lib/rubocop/cop/performance/start_with.rb
@@ -15,9 +15,18 @@ module Performance
       #   'abc'.match(/\Aab/)
       #   /\Aab/.match('abc')
       #
+      #   'abc'.match?(/^ab/)
+      #   /^ab/.match?('abc')
+      #   'abc' =~ /^ab/
+      #   /^ab/ =~ 'abc'
+      #   'abc'.match(/^ab/)
+      #   /^ab/.match('abc')
+      #
       #   # good
       #   'abc'.start_with?('ab')
       class StartWith < Cop
+        include RegexpMetacharacter
+
         MSG = 'Use `String#start_with?` instead of a regex match anchored to ' \
               'the beginning of the string.'
         SINGLE_QUOTE = "'"
@@ -28,16 +37,6 @@ class StartWith < Cop
            (match-with-lvasgn (regexp (str $#literal_at_start?) (regopt)) $_)}
         PATTERN
 
-        def literal_at_start?(regex_str)
-          # is this regexp 'literal' in the sense of only matching literal
-          # chars, rather than using metachars like `.` and `*` and so on?
-          # also, is it anchored at the start of the string?
-          # (tricky: \s, \d, and so on are metacharacters, but other characters
-          #  escaped with a slash are just literals. LITERAL_REGEX takes all
-          #  that into account.)
-          regex_str =~ /\A\\A(?:#{LITERAL_REGEX})+\z/
-        end
-
         def on_send(node)
           return unless redundant_regex?(node)
 
@@ -48,7 +47,7 @@ def on_send(node)
         def autocorrect(node)
           redundant_regex?(node) do |receiver, regex_str|
             receiver, regex_str = regex_str, receiver if receiver.is_a?(String)
-            regex_str = regex_str[2..-1] # drop \A anchor
+            regex_str = drop_start_metacharacter(regex_str)
             regex_str = interpret_string_escapes(regex_str)
 
             lambda do |corrector|

diff --git a/lib/rubocop/cop/performance_cops.rb b/lib/rubocop/cop/performance_cops.rb
@@ -1,5 +1,7 @@
 # frozen_string_literal: true
 
+require_relative 'mixin/regexp_metacharacter'
+
 require_relative 'performance/bind_call'
 require_relative 'performance/caller'
 require_relative 'performance/case_when_splat'

diff --git a/manual/cops_performance.md b/manual/cops_performance.md
@@ -390,6 +390,13 @@ would suffice.
 'abc'.match(/bc\Z/)
 /bc\Z/.match('abc')
 
+'abc'.match?(/bc$/)
+/bc$/.match?('abc')
+'abc' =~ /bc$/
+/bc$/ =~ 'abc'
+'abc'.match(/bc$/)
+/bc$/.match('abc')
+
 # good
 'abc'.end_with?('bc')
 ```
@@ -860,6 +867,13 @@ This cop identifies unnecessary use of a regex where
 'abc'.match(/\Aab/)
 /\Aab/.match('abc')
 
+'abc'.match?(/^ab/)
+/^ab/.match?('abc')
+'abc' =~ /^ab/
+/^ab/ =~ 'abc'
+'abc'.match(/^ab/)
+/^ab/.match('abc')
+
 # good
 'abc'.start_with?('ab')
 ```

diff --git a/spec/rubocop/cop/performance/end_with_spec.rb b/spec/rubocop/cop/performance/end_with_spec.rb
@@ -14,6 +14,16 @@
       expect(new_source).to eq "str.end_with?('abc')"
     end
 
+    it "autocorrects str#{method} /abc$/" do
+      new_source = autocorrect_source("str#{method} /abc$/")
+      expect(new_source).to eq "str.end_with?('abc')"
+    end
+
+    it "autocorrects /abc$/#{method} str" do
+      new_source = autocorrect_source("/abc$/#{method} str")
+      expect(new_source).to eq "str.end_with?('abc')"
+    end
+
     it "autocorrects str#{method} /\\n\\z/" do
       new_source = autocorrect_source("str#{method} /\\n\\z/")
       expect(new_source).to eq 'str.end_with?("\n")'

diff --git a/spec/rubocop/cop/performance/start_with_spec.rb b/spec/rubocop/cop/performance/start_with_spec.rb
@@ -14,6 +14,16 @@
       expect(new_source).to eq "str.start_with?('abc')"
     end
 
+    it "autocorrects str#{method} /^abc/" do
+      new_source = autocorrect_source("str#{method} /^abc/")
+      expect(new_source).to eq "str.start_with?('abc')"
+    end
+
+    it "autocorrects /^abc/#{method} str" do
+      new_source = autocorrect_source("/^abc/#{method} str")
+      expect(new_source).to eq "str.start_with?('abc')"
+    end
+
     # escapes like "\n"
     # note that "\b" is a literal backspace char in a double-quoted string...
     # but in a regex, it's an anchor on a word boundary