Fix UrlMatcher (and UrlLikeMatcher) to exclude character not allowed …

…at the end of a URL
kaboc · Dec 3, 2023 · fb56445 · fb56445
1 parent 1c6b3eb
commit fb56445
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 12 deletions.
diff --git a/lib/src/preset_matchers.dart b/lib/src/preset_matchers.dart
@@ -5,8 +5,17 @@ const _kUrlPattern = r'(?:'
     r'|\d{1,3}(?:\.\d{1,3}){3}'
     r'|localhost'
     r')'
+    // Port number
     r'(?::\d{1,5})?'
-    r"(?:[/?#](?:(?:[\w\-.~%!#$&'()*+,/:;=?@\[\]]+/?)*[^\s.\\])?)?";
+    // Delimiter in front of the path
+    r'(?:[/?#]'
+    r'(?:'
+    // Characters that the path can contain
+    r"(?:[\w\-.~%!#$&'()*+,/:;=?@\[\]]+/?)*"
+    // Characters allowed at the end of the path
+    r'[\w\-~/]'
+    r')?'
+    r')?';
 
 /// A variant of [TextMatcher] for parsing URLs that start with http(s).
 class UrlMatcher extends TextMatcher {

diff --git a/test/matchers/url_matcher_test.dart b/test/matchers/url_matcher_test.dart
@@ -75,17 +75,52 @@ void main() {
     expect(matches, isEmpty);
   });
 
-  test('dot at the end is excluded', () {
-    const url = 'https://example.com/';
-    const input1 = '$url.';
-    final matches1 = regExp.allMatches(input1).toList();
-    final found1 = input1.substring(matches1[0].start, matches1[0].end);
-    expect(found1, equals(url));
-
-    const input2 = '$url.\n';
-    final matches2 = regExp.allMatches(input2).toList();
-    final found2 = input2.substring(matches2[0].start, matches2[0].end);
-    expect(found2, equals(url));
+  test('only very limited characters are allowed at the end of path', () {
+    const urls1 = {
+      'https://example.com/111',
+      'https://example.com/aaa',
+      'https://example.com/AAA',
+      r'https://example.com/111_',
+      r'https://example.com/111-',
+      'https://example.com/111~',
+    };
+
+    for (final input in urls1) {
+      final match = regExp.firstMatch(input)!;
+      final found = input.substring(match.start, match.end);
+      expect(found, input);
+    }
+
+    const urls2 = {
+      'https://example.com/aaa ',
+      r'https://example.com/aaa.',
+      r'https://example.com/aaa\',
+      'https://example.com/aaa!',
+      'https://example.com/aaa#',
+      r'https://example.com/aaa$',
+      'https://example.com/aaa&',
+      "https://example.com/aaa'",
+      'https://example.com/aaa(',
+      'https://example.com/aaa)',
+      'https://example.com/aaa*',
+      'https://example.com/aaa+',
+      'https://example.com/aaa,',
+      'https://example.com/aaa:',
+      'https://example.com/aaa;',
+      'https://example.com/aaa=',
+      'https://example.com/aaa?',
+      'https://example.com/aaa@',
+      'https://example.com/aaa[',
+      'https://example.com/aaa]',
+      'https://example.com/aaa）',
+      'https://example.com/aaaあ',
+    };
+
+    for (final input in urls2) {
+      final match = regExp.firstMatch(input)!;
+      final found = input.substring(match.start, match.end);
+      expect(found, 'https://example.com/aaa');
+    }
   });
 
   test('path can contain dots', () {