From 8702499c4bddad34fcc852f8ba15b3a129d4eafd Mon Sep 17 00:00:00 2001 From: agneskleinhans Date: Mon, 4 Dec 2023 16:16:38 +0100 Subject: [PATCH 1/4] [de] improve germanSpeller --- .../rules/de/GermanSpellerRule.java | 12 +++++ .../resource/de/words_no_infix_s.txt | 44 +++++++++++++++++-- .../rules/de/GermanSpellerRuleTest.java | 1 + 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java index 13a74044a69d..2f3769c42d71 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java @@ -2217,6 +2217,18 @@ protected boolean ignorePotentiallyMisspelledWord(String word) throws IOExceptio if (parts.size() == 1) { parts = nonStrictCompoundTokenizer.tokenize(wordNoDot); nonStrictMode = true; + String part2 = " "; + for (String w : wordsWithoutInfixS) { // wordsWithHyphen + if (word.startsWith(w)) { + part2 = word.substring(w.length()); + if (part2.startsWith("-")) { + part2 = part2.substring(1); + } else if (word.length() > w.length() && w.length() > 3) { + part2 = uppercaseFirstChar(part2.substring(0)); + } + return (!isMisspelled(part2) || ignorePotentiallyMisspelledWord(part2)) && isNoun(part2); + } + } } String part1; String part2; diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt index 9ce5f1b43792..57efddd43721 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt @@ -75,7 +75,6 @@ Teich Trail Villa Vinyl -Wacht Yacht Kirmes Gewächs @@ -1756,8 +1755,47 @@ Multizellen Aminosäuren Ausreißer Drehzahl -Absenk Gastro Ausdauer Anthropomorphismus -Charles-Bonnet-Syndrom \ No newline at end of file +Charles-Bonnet-Syndrom +Allergie +Trüffel +Fondue +Buddha +Zellulose +Angehörigen +Anthropomorphismus +Waffel +Black-Friday- +Anti- +All-in-One- +Remote-Work- +American-Football- +All-you-can-eat- +Double-Opt-In- +Cash-Conversion- +Al-Shifa- +Anti-Stokes– +Farm-to-Fork +Afro-Pop- +Backup-Restore- +Before-Class- +Make-up- +Always-on- +Fake- +Build-to-Order- +Air-Asia- +Airport- +Workshop- +Continuous-Improvement- +Above-the-Fold- +CDU- +SPD- +AfD- +E-Commerce-Conversion- +E-Commerce- +Black-Week- +Boho- +Charles-Bonnet- +Allergiker \ No newline at end of file diff --git a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java index 94fcd85d9f93..fd0c26950791 100644 --- a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java +++ b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java @@ -61,6 +61,7 @@ public class GermanSpellerRuleTest { @Test public void testIgnoreMisspelledWord() throws IOException { GermanSpellerRule rule = new GermanSpellerRule(TestTools.getMessages("de"), GERMAN_DE); + assertTrue(rule.ignorePotentiallyMisspelledWord("Black-Friday-Schnäppchen")); assertTrue(rule.ignorePotentiallyMisspelledWord("Atmosphärenkonzept")); assertTrue(rule.ignorePotentiallyMisspelledWord("Wölkchenbildung")); assertFalse(rule.ignorePotentiallyMisspelledWord("Abschlussgruße")); // probably "...grüße" From 686fcb17ddfbfbd9a1eb502e0500d33964647f0c Mon Sep 17 00:00:00 2001 From: agneskleinhans Date: Wed, 6 Dec 2023 13:16:16 +0100 Subject: [PATCH 2/4] [de] add test + improve GermanSpeller.java --- .../java/org/languagetool/rules/de/GermanSpellerRule.java | 6 +++--- .../org/languagetool/rules/de/GermanSpellerRuleTest.java | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java index 2f3769c42d71..1ba58c3c270d 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java @@ -2215,8 +2215,6 @@ protected boolean ignorePotentiallyMisspelledWord(String word) throws IOExceptio List parts = compoundTokenizer.tokenize(wordNoDot); boolean nonStrictMode = false; if (parts.size() == 1) { - parts = nonStrictCompoundTokenizer.tokenize(wordNoDot); - nonStrictMode = true; String part2 = " "; for (String w : wordsWithoutInfixS) { // wordsWithHyphen if (word.startsWith(w)) { @@ -2228,7 +2226,9 @@ protected boolean ignorePotentiallyMisspelledWord(String word) throws IOExceptio } return (!isMisspelled(part2) || ignorePotentiallyMisspelledWord(part2)) && isNoun(part2); } - } + } + parts = nonStrictCompoundTokenizer.tokenize(wordNoDot); + nonStrictMode = true; } String part1; String part2; diff --git a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java index fd0c26950791..11093288b5f4 100644 --- a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java +++ b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java @@ -61,6 +61,8 @@ public class GermanSpellerRuleTest { @Test public void testIgnoreMisspelledWord() throws IOException { GermanSpellerRule rule = new GermanSpellerRule(TestTools.getMessages("de"), GERMAN_DE); + assertTrue(rule.ignorePotentiallyMisspelledWord("Farm-to-Fork-Bereich")); + assertFalse(rule.ignorePotentiallyMisspelledWord("Englisch-KenntnisseWeitere")); assertTrue(rule.ignorePotentiallyMisspelledWord("Black-Friday-Schnäppchen")); assertTrue(rule.ignorePotentiallyMisspelledWord("Atmosphärenkonzept")); assertTrue(rule.ignorePotentiallyMisspelledWord("Wölkchenbildung")); From c6a3cdd52609f794aba77990dabcc065de72beca Mon Sep 17 00:00:00 2001 From: agneskleinhans Date: Wed, 6 Dec 2023 13:16:42 +0100 Subject: [PATCH 3/4] [de] add words to spelling/infix-s/no-infix-s --- .../resource/de/hunspell/spelling.txt | 5 ++- .../resource/de/words_infix_s.txt | 4 ++- .../resource/de/words_no_infix_s.txt | 34 ++++++++++++++++++- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/hunspell/spelling.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/hunspell/spelling.txt index cdea7ef727d3..898848d4061d 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/hunspell/spelling.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/hunspell/spelling.txt @@ -61678,4 +61678,7 @@ Petruskreuze/SN Pileolus Pileoli Pentalpha/S -Römerkragen/S \ No newline at end of file +Römerkragen/S +Alexander-von-Humboldt-Realschule +Augsburg-Haunstetten-Siebenbrunn +Denavit-Hartenberg-Parameter/S \ No newline at end of file diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt index 73592c423c57..0d5b9d680258 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt @@ -311,4 +311,6 @@ Werbeanbringung Wohlgeformtheit Anomalität Dämmerung -Studiengang \ No newline at end of file +Studiengang +Karaoke +Ayurveda \ No newline at end of file diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt index 57efddd43721..e81fe676cd2f 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt @@ -1779,6 +1779,7 @@ Al-Shifa- Anti-Stokes– Farm-to-Fork Afro-Pop- +Afro- Backup-Restore- Before-Class- Make-up- @@ -1798,4 +1799,35 @@ E-Commerce- Black-Week- Boho- Charles-Bonnet- -Allergiker \ No newline at end of file +Allergiker +Vehicle-to-Grid- +Best-Practice- +Agri-Photovoltaik- +Face-to-Face- +CBD- +Hausstaubmilben +Cradle-to-Cradle- +Heavy-Metal- +Akkusativ +Dativ +Genitiv +Nominativ +Due-Diligence- +Deep-Learning- +Transformer- +Elektrolyse +Gebrauchshund +EU-Mercosur- +Go-To-Market +Feedback +Pop-Up- +Pop- +.com- +Business-to-Business- +B2B- +Geflüchteten +Coming-Home- +Gourmet +Betroffenen +E-Commerce +Adjektiv \ No newline at end of file From 7c00b449829dd256711d12549d8e180d1824c400 Mon Sep 17 00:00:00 2001 From: agneskleinhans Date: Wed, 6 Dec 2023 14:49:29 +0100 Subject: [PATCH 4/4] [de] remove/add words --- .../resources/org/languagetool/resource/de/words_infix_s.txt | 1 - .../org/languagetool/resource/de/words_no_infix_s.txt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt index 0d5b9d680258..51b592c8b2f2 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_infix_s.txt @@ -312,5 +312,4 @@ Wohlgeformtheit Anomalität Dämmerung Studiengang -Karaoke Ayurveda \ No newline at end of file diff --git a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt index e81fe676cd2f..55f916b369ec 100644 --- a/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt +++ b/languagetool-language-modules/de/src/main/resources/org/languagetool/resource/de/words_no_infix_s.txt @@ -1830,4 +1830,5 @@ Coming-Home- Gourmet Betroffenen E-Commerce -Adjektiv \ No newline at end of file +Adjektiv +Karaoke \ No newline at end of file