From f745e264d2af5cf776ae327e04c1900eb847c548 Mon Sep 17 00:00:00 2001 From: Brian Birtles Date: Mon, 14 Aug 2023 11:21:00 +0900 Subject: [PATCH] fix: don't match a trailing period after the path Fixes #17. This also extends `trailingPeriod: false` (the default) to exclude matching a trailing question mark (?) or exclamation mark (!). That is if you want "Check out example.com/project." to NOT match the trailing period, you typically also want it to NOT match the trailing exclamation mark in "Check out example.com/project!". Likewise for "Have you seen example.com/project?". I guess technically `trailingPeriod` should be renamed `trailingPunctuation` but maybe it's fine as-is? --- src/index.js | 27 ++++++++++------- test/test.js | 86 ++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 89 insertions(+), 24 deletions(-) diff --git a/src/index.js b/src/index.js index b9a9f42..9092cc0 100644 --- a/src/index.js +++ b/src/index.js @@ -45,17 +45,22 @@ module.exports = (options) => { })${options.trailingPeriod ? '\\.?' : ''}`; const port = '(?::\\d{2,5})?'; - // Not accept closing parenthesis - // - // Don't allow apostrophes - // - const path = options.parens - ? options.apostrophes - ? '(?:[/?#][^\\s"]*)?' - : '(?:[/?#][^\\s"\']*)?' - : options.apostrophes - ? '(?:[/?#][^\\s"\\)]*)?' - : '(?:[/?#][^\\s"\\)\']*)?'; + let disallowedChars = '\\s"'; + if (!options.parens) { + // Not accept closing parenthesis + // + disallowedChars += '\\)'; + } + + if (!options.apostrophes) { + // Don't allow apostrophes + // + disallowedChars += "'"; + } + + const path = options.trailingPeriod + ? `(?:[/?#][^${disallowedChars}]*)?` + : `(?:(?:[/?#][^${disallowedChars}]*[^${disallowedChars}.?!])|[/])?`; // Added IPv6 support // diff --git a/test/test.js b/test/test.js index bd25ef9..75e622f 100644 --- a/test/test.js +++ b/test/test.js @@ -500,17 +500,77 @@ test('localhost', (t) => { ); }); -test('trailing period', (t) => { - t.deepEqual( - 'background example.com. foobar.com'.match( - urlRegex({ trailingPeriod: true }) - ), - ['example.com.', 'foobar.com'] - ); - t.deepEqual( - 'background example.com. foobar.com'.match( - urlRegex({ trailingPeriod: false }) - ), +for (const [source, withTrailingPeriod, withoutTrailingPeriod] of [ + [ + 'background example.com. foobar.com', + ['example.com.', 'foobar.com'], ['example.com', 'foobar.com'] - ); -}); + ], + [ + 'https://example.com/dir.', + ['https://example.com/dir.'], + ['https://example.com/dir'] + ], + [ + 'https://example.com/dir. ', + ['https://example.com/dir.'], + ['https://example.com/dir'] + ], + [ + 'https://example.com/dir.\n', + ['https://example.com/dir.'], + ['https://example.com/dir'] + ], + [ + 'https://example.com/index.html', + ['https://example.com/index.html'], + ['https://example.com/index.html'] + ], + [ + 'https://example.com/index.html.', + ['https://example.com/index.html.'], + ['https://example.com/index.html'] + ], + [ + 'https://example.com/dir.with.dot/.', + ['https://example.com/dir.with.dot/.'], + ['https://example.com/dir.with.dot/'] + ], + // Question marks + ['Have you ever visited example.com?', ['example.com?'], ['example.com']], + ['example.com/?', ['example.com/?'], ['example.com/']], + [ + 'https://example.com/dir?', + ['https://example.com/dir?'], + ['https://example.com/dir'] + ], + // Exclamation marks + ['You should check out example.com!', ['example.com'], ['example.com']], + ['Here is example.com/!', ['example.com/!'], ['example.com/']], + [ + 'https://example.com/dir/!', + ['https://example.com/dir/!'], + ['https://example.com/dir/'] + ], + [ + 'https://example.com/dir!', + ['https://example.com/dir!'], + ['https://example.com/dir'] + ] +]) { + const sourceTitle = source.replace('\n', '\\n'); + + test(`trailingPeriod: true (${sourceTitle})`, (t) => { + t.deepEqual( + source.match(urlRegex({ trailingPeriod: true })), + withTrailingPeriod + ); + }); + + test(`trailingPeriod: false (${sourceTitle})`, (t) => { + t.deepEqual( + source.match(urlRegex({ trailingPeriod: false })), + withoutTrailingPeriod + ); + }); +}