From 61ee8af1ce4629552043ff7a19579b9fe75026dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Thu, 12 Aug 2021 17:40:28 +0300 Subject: [PATCH] MBS-11854: Recognize unicode hyphen in guess case (#2199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Words with a unicode hyphen were not being recognized as split words: the hyphen was just being considered one more character. This changes that, and then ensures that guess case recognizes the hyphen as such and does the same as with a hyphen-minus. Sadly, part of my fix for MBS-10156 (for re‐mode with unicode hyphen) will no longer work now since it is no longer considered one single word, but I think this makes a lot more sense for now. The current code already didn't do anything with re-mode with a hyphen-minus, so this actually adds consistency. --- .../guess-case/MB/GuessCase/Handler/Base.js | 4 ++-- .../scripts/guess-case/MB/GuessCase/Input.js | 2 +- root/static/scripts/guess-case/utils.js | 1 - root/static/scripts/tests/GuessCase.js | 14 +++++++++++--- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/root/static/scripts/guess-case/MB/GuessCase/Handler/Base.js b/root/static/scripts/guess-case/MB/GuessCase/Handler/Base.js index 9cf7a3c11a7..c7442878051 100644 --- a/root/static/scripts/guess-case/MB/GuessCase/Handler/Base.js +++ b/root/static/scripts/guess-case/MB/GuessCase/Handler/Base.js @@ -117,7 +117,7 @@ MB.GuessCase.Handler.Base = function (gc) { */ var handled = false; if (!gc.regexes.SPECIALCASES) { - gc.regexes.SPECIALCASES = /(&|¿|¡|\?|\!|;|:|'|‘|’|‹|›|"|“|”|„|“|«|»|\-|\+|,|\*|\.|#|%|\/|\(|\)|\{|\}|\[|\])/; + gc.regexes.SPECIALCASES = /(&|¿|¡|\?|\!|;|:|'|‘|’|‹|›|"|“|”|„|“|«|»|\-|‐|\+|,|\*|\.|#|%|\/|\(|\)|\{|\}|\[|\])/; } if (input.matchCurrentWord(gc.regexes.SPECIALCASES)) { handled = !!( @@ -332,7 +332,7 @@ MB.GuessCase.Handler.Base = function (gc) { */ self.doHyphen = function () { if (!gc.regexes.HYPHEN) { - gc.regexes.HYPHEN = '-'; + gc.regexes.HYPHEN = /^[\-‐]$/; } if (input.matchCurrentWord(gc.regexes.HYPHEN)) { output.appendWordPreserveWhiteSpace(true); diff --git a/root/static/scripts/guess-case/MB/GuessCase/Input.js b/root/static/scripts/guess-case/MB/GuessCase/Input.js index 6b53bd3fb77..67d078dde16 100644 --- a/root/static/scripts/guess-case/MB/GuessCase/Input.js +++ b/root/static/scripts/guess-case/MB/GuessCase/Input.js @@ -182,7 +182,7 @@ class GuessCaseInput { const splitwords = []; let word = []; if (!gc.regexes.SPLITWORDSANDPUNCTUATION) { - gc.regexes.SPLITWORDSANDPUNCTUATION = /[^!¿¡\"%&'´`‘’‹›“”„“«»()\[\]\{\}\*\+,-\.\/:;<=>\?\s#]/; + gc.regexes.SPLITWORDSANDPUNCTUATION = /[^!¿¡\"%&'´`‘’‹›“”„“«»()\[\]\{\}\*\+‐\-,\.\/:;<=>\?\s#]/; } for (let i = 0; i < chars.length; i++) { if (chars[i].match(gc.regexes.SPLITWORDSANDPUNCTUATION)) { diff --git a/root/static/scripts/guess-case/utils.js b/root/static/scripts/guess-case/utils.js index 950ec6cbe87..088caee0853 100644 --- a/root/static/scripts/guess-case/utils.js +++ b/root/static/scripts/guess-case/utils.js @@ -61,7 +61,6 @@ const preBracketSingleWordsList = [ 'rehearsal', 'remixed', 'remode', - 're‐mode', 'rework', 'reworked', 'session', diff --git a/root/static/scripts/tests/GuessCase.js b/root/static/scripts/tests/GuessCase.js index 4eda1819631..5e95861d182 100644 --- a/root/static/scripts/tests/GuessCase.js +++ b/root/static/scripts/tests/GuessCase.js @@ -175,7 +175,7 @@ test('Recording', function (t) { }); test('Work', function (t) { - t.plan(23); + t.plan(24); const tests = [ { @@ -344,6 +344,14 @@ test('Work', function (t) { roman: false, keepuppercase: false, }, + { + input: 'hyphen-minus? hyphen‐maximus!', + expected: 'Hyphen-Minus? Hyphen‐Maximus!', + bug: 'MBS-11854', + mode: 'English', + roman: false, + keepuppercase: false, + }, ]; for (const test of tests) { @@ -457,8 +465,8 @@ test('BugFixes', function (t) { mode: 'French', }, { - input: 'We Love Techno (Re‐Mode)', - expected: 'We Love Techno (re‐mode)', + input: 'We Love Techno (Remode)', + expected: 'We Love Techno (remode)', bug: 'MBS-10156', mode: 'English', },