From 20a9daf6bc0fcb10788bfa28d90c6c1f7f617222 Mon Sep 17 00:00:00 2001 From: Fred Kruse Date: Tue, 11 Sep 2018 20:51:59 +0200 Subject: [PATCH] AbstractFillerWordsRule: fixed single/double paragraph mark problem --- .../rules/AbstractFillerWordsRule.java | 54 ++++++++++--------- .../org/languagetool/language/German.java | 2 +- .../rules/de/GermanFillerWordsRule.java | 7 +-- .../rules/de/GermanFillerWordsRuleTest.java | 7 ++- .../org/languagetool/language/Portuguese.java | 2 +- .../rules/pt/PortugueseFillerWordsRule.java | 7 +-- 6 files changed, 43 insertions(+), 36 deletions(-) diff --git a/languagetool-core/src/main/java/org/languagetool/rules/AbstractFillerWordsRule.java b/languagetool-core/src/main/java/org/languagetool/rules/AbstractFillerWordsRule.java index 57d91cfca5a0..e7248f155fb0 100644 --- a/languagetool-core/src/main/java/org/languagetool/rules/AbstractFillerWordsRule.java +++ b/languagetool-core/src/main/java/org/languagetool/rules/AbstractFillerWordsRule.java @@ -26,6 +26,7 @@ import org.languagetool.AnalyzedSentence; import org.languagetool.AnalyzedTokenReadings; +import org.languagetool.Language; import org.languagetool.UserConfig; import org.languagetool.rules.Category.Location; @@ -48,16 +49,18 @@ public abstract class AbstractFillerWordsRule extends TextLevelRule { private static final boolean DEFAULT_ACTIVATION = false; private int minPercent = DEFAULT_MIN_PERCENT; + private final Language lang; /* * Override this to detect filler words in the specified language */ protected abstract boolean isFillerWord(String token); - public AbstractFillerWordsRule(ResourceBundle messages, UserConfig userConfig, boolean defaultActive) { + public AbstractFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig, boolean defaultActive) { super(messages); super.setCategory(new Category(new CategoryId("CREATIVE_WRITING"), messages.getString("category_creative_writing"), Location.INTERNAL, false)); + this.lang = lang; if (!defaultActive) { setDefaultOff(); } @@ -70,8 +73,8 @@ public AbstractFillerWordsRule(ResourceBundle messages, UserConfig userConfig, b setLocQualityIssueType(ITSIssueType.Style); } - public AbstractFillerWordsRule(ResourceBundle messages, UserConfig userConfig) { - this(messages, userConfig, DEFAULT_ACTIVATION); + public AbstractFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig) { + this(messages, lang, userConfig, DEFAULT_ACTIVATION); } @Override @@ -115,7 +118,7 @@ public String getMessage() { protected boolean isException(AnalyzedTokenReadings[] tokens, int num) { return false; } - + @Override public RuleMatch[] match(List sentences) throws IOException { List ruleMatches = new ArrayList<>(); @@ -128,42 +131,41 @@ public RuleMatch[] match(List sentences) throws IOException { int wordCount = 0; boolean isDirectSpeech = false; for (AnalyzedSentence sentence : sentences) { - AnalyzedTokenReadings[] tokens = sentence.getTokens(); - for (int n = 0; n < tokens.length; n++) { + AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); + for (int n = 1; n < tokens.length; n++) { AnalyzedTokenReadings token = tokens[n]; String sToken = token.getToken(); - if (OPENING_QUOTES.matcher(sToken).matches() && n < tokens.length -1 && !tokens[n + 1].isWhitespace()) { + if (OPENING_QUOTES.matcher(sToken).matches() && n < tokens.length -1 && !tokens[n + 1].isWhitespaceBefore()) { isDirectSpeech = true; } - else if (ENDING_QUOTES.matcher(sToken).matches() - && !tokens[n - 1].isWhitespace() && !tokens[n - 1].isSentenceStart()) { + else if (ENDING_QUOTES.matcher(sToken).matches() && n > 1 && !tokens[n].isWhitespaceBefore()) { isDirectSpeech = false; } - else if ((!isDirectSpeech || minPercent == 0) && !token.isWhitespace() && !token.isSentenceStart() - && !token.isSentenceEnd() && !NON_WORD_REGEX.matcher(sToken).matches()) { + else if ((!isDirectSpeech || minPercent == 0) && !token.isWhitespace() && !NON_WORD_REGEX.matcher(sToken).matches()) { wordCount++; if (isFillerWord(sToken) && !isException(tokens, n)) { startPos.add(token.getStartPos() + pos); endPos.add(token.getEndPos() + pos); relevantSentences.add(sentence); } - } else if ("\n".equals(sToken) || "\r\n".equals(sToken) || "\n\r".equals(sToken)) { - if(wordCount > 0) { - percent = startPos.size() * 100.0 / wordCount; - } else { - percent = 0; - } - if (percent > minPercent) { - for (int i = 0; i < startPos.size(); i++) { - RuleMatch ruleMatch = new RuleMatch(this, sentence, startPos.get(i), endPos.get(i), msg); - ruleMatches.add(ruleMatch); - } + } + } + if (sentence.hasParagraphEndMark(lang)) { + if(wordCount > 0) { + percent = startPos.size() * 100.0 / wordCount; + } else { + percent = 0; + } + if (percent > minPercent) { + for (int i = 0; i < startPos.size(); i++) { + RuleMatch ruleMatch = new RuleMatch(this, relevantSentences.get(i), startPos.get(i), endPos.get(i), msg); + ruleMatches.add(ruleMatch); } - wordCount = 0; - startPos = new ArrayList<>(); - endPos = new ArrayList<>(); - relevantSentences = new ArrayList<>(); } + wordCount = 0; + startPos = new ArrayList<>(); + endPos = new ArrayList<>(); + relevantSentences = new ArrayList<>(); } pos += sentence.getText().length(); } diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java b/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java index adfab4d769da..5791fbf6283c 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/language/German.java @@ -192,7 +192,7 @@ public List getRelevantRules(ResourceBundle messages, UserConfig userConfi new CompoundCoherencyRule(messages), new LongSentenceRule(messages, userConfig), new LongParagraphRule(messages, this, userConfig), - new GermanFillerWordsRule(messages, userConfig), + new GermanFillerWordsRule(messages, this, userConfig), new GermanParagraphRepeatBeginningRule(messages, this), new PunctuationMarkAtParagraphEnd(messages, this), new DuUpperLowerCaseRule(messages), diff --git a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanFillerWordsRule.java b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanFillerWordsRule.java index 22d661f6c439..8ab5470791ae 100644 --- a/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanFillerWordsRule.java +++ b/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanFillerWordsRule.java @@ -26,6 +26,7 @@ import javax.swing.JOptionPane; import org.languagetool.AnalyzedTokenReadings; +import org.languagetool.Language; import org.languagetool.UserConfig; import org.languagetool.rules.AbstractFillerWordsRule; @@ -59,8 +60,8 @@ public class GermanFillerWordsRule extends AbstractFillerWordsRule { "womöglich","ziemlich","zudem","zugegeben","zumeist","zusehends","zuweilen","zweifellos","zweifelsfrei","zweifelsohne" )); - public GermanFillerWordsRule(ResourceBundle messages, UserConfig userConfig) { - super(messages, userConfig); + public GermanFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig) { + super(messages, lang, userConfig); } @Override @@ -75,7 +76,7 @@ protected boolean isFillerWord(String token) { @Override public boolean isException(AnalyzedTokenReadings[] tokens, int num) { - if ("aber".equals(tokens[num].getToken()) && num >= 2 && ",".equals(tokens[num - 2].getToken())) { + if ("aber".equals(tokens[num].getToken()) && num >= 1 && ",".equals(tokens[num - 1].getToken())) { return true; } return false; diff --git a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanFillerWordsRuleTest.java b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanFillerWordsRuleTest.java index 3a6a94c0e525..63e38b8c2f15 100644 --- a/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanFillerWordsRuleTest.java +++ b/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanFillerWordsRuleTest.java @@ -20,6 +20,7 @@ import org.junit.Test; import org.languagetool.JLanguageTool; +import org.languagetool.Language; import org.languagetool.TestTools; import org.languagetool.UserConfig; import org.languagetool.language.German; @@ -35,10 +36,12 @@ * @author Fred Kruse */ public class GermanFillerWordsRuleTest { + + private final Language lang = new German(); @Test public void testRule() throws IOException { - JLanguageTool lt = new JLanguageTool(new German()); + JLanguageTool lt = new JLanguageTool(lang); setUpRule(lt, null); // more than 8% filler words (default) @@ -63,7 +66,7 @@ private void setUpRule(JLanguageTool lt, UserConfig userConfig) { lt.disableRule(rule.getId()); } GermanFillerWordsRule rule = - new GermanFillerWordsRule(TestTools.getMessages(new German().getShortCode()), userConfig); + new GermanFillerWordsRule(TestTools.getMessages(lang.getShortCode()), lang, userConfig); lt.addRule(rule); lt.enableRule(rule.getId()); } diff --git a/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java b/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java index f0383897c883..d510dd286d6d 100644 --- a/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java +++ b/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java @@ -161,7 +161,7 @@ public List getRelevantRules(ResourceBundle messages, UserConfig userConfi new PortugueseReplaceRule(messages), new PortugueseBarbarismsRule(messages), new PortugueseClicheRule(messages), - new PortugueseFillerWordsRule(messages, userConfig), + new PortugueseFillerWordsRule(messages, this, userConfig), new PortugueseRedundancyRule(messages), new PortugueseWordinessRule(messages), new PortugueseWeaselWordsRule(messages), diff --git a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseFillerWordsRule.java b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseFillerWordsRule.java index 9d5bbbc2b742..b8fbcda7a591 100644 --- a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseFillerWordsRule.java +++ b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseFillerWordsRule.java @@ -24,6 +24,7 @@ import java.util.Set; import org.languagetool.AnalyzedTokenReadings; +import org.languagetool.Language; import org.languagetool.UserConfig; import org.languagetool.rules.AbstractFillerWordsRule; @@ -59,8 +60,8 @@ public class PortugueseFillerWordsRule extends AbstractFillerWordsRule { "toda", "todas", "todo", "todos", "tudo", "ultrajante", "velho", "verdade", "vez", "vezes", "volta" )); - public PortugueseFillerWordsRule(ResourceBundle messages, UserConfig userConfig) { - super(messages, userConfig); + public PortugueseFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig) { + super(messages, lang, userConfig); } @Override @@ -75,7 +76,7 @@ protected boolean isFillerWord(String token) { @Override public boolean isException(AnalyzedTokenReadings[] tokens, int num) { - if ("mas".equals(tokens[num].getToken()) && num >= 2 && ",".equals(tokens[num - 2].getToken())) { + if ("mas".equals(tokens[num].getToken()) && num >= 1 && ",".equals(tokens[num - 1].getToken())) { return true; } return false;