Skip to content

Commit

Permalink
AbstractFillerWordsRule: fixed single/double paragraph mark problem
Browse files Browse the repository at this point in the history
  • Loading branch information
FredKruse committed Sep 11, 2018
1 parent 47176b0 commit 20a9daf
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 36 deletions.
Expand Up @@ -26,6 +26,7 @@


import org.languagetool.AnalyzedSentence; import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings; import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.UserConfig; import org.languagetool.UserConfig;
import org.languagetool.rules.Category.Location; import org.languagetool.rules.Category.Location;


Expand All @@ -48,16 +49,18 @@ public abstract class AbstractFillerWordsRule extends TextLevelRule {
private static final boolean DEFAULT_ACTIVATION = false; private static final boolean DEFAULT_ACTIVATION = false;


private int minPercent = DEFAULT_MIN_PERCENT; private int minPercent = DEFAULT_MIN_PERCENT;
private final Language lang;


/* /*
* Override this to detect filler words in the specified language * Override this to detect filler words in the specified language
*/ */
protected abstract boolean isFillerWord(String token); protected abstract boolean isFillerWord(String token);


public AbstractFillerWordsRule(ResourceBundle messages, UserConfig userConfig, boolean defaultActive) { public AbstractFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig, boolean defaultActive) {
super(messages); super(messages);
super.setCategory(new Category(new CategoryId("CREATIVE_WRITING"), super.setCategory(new Category(new CategoryId("CREATIVE_WRITING"),
messages.getString("category_creative_writing"), Location.INTERNAL, false)); messages.getString("category_creative_writing"), Location.INTERNAL, false));
this.lang = lang;
if (!defaultActive) { if (!defaultActive) {
setDefaultOff(); setDefaultOff();
} }
Expand All @@ -70,8 +73,8 @@ public AbstractFillerWordsRule(ResourceBundle messages, UserConfig userConfig, b
setLocQualityIssueType(ITSIssueType.Style); setLocQualityIssueType(ITSIssueType.Style);
} }


public AbstractFillerWordsRule(ResourceBundle messages, UserConfig userConfig) { public AbstractFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig) {
this(messages, userConfig, DEFAULT_ACTIVATION); this(messages, lang, userConfig, DEFAULT_ACTIVATION);
} }


@Override @Override
Expand Down Expand Up @@ -115,7 +118,7 @@ public String getMessage() {
protected boolean isException(AnalyzedTokenReadings[] tokens, int num) { protected boolean isException(AnalyzedTokenReadings[] tokens, int num) {
return false; return false;
} }

@Override @Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException { public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
List<RuleMatch> ruleMatches = new ArrayList<>(); List<RuleMatch> ruleMatches = new ArrayList<>();
Expand All @@ -128,42 +131,41 @@ public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
int wordCount = 0; int wordCount = 0;
boolean isDirectSpeech = false; boolean isDirectSpeech = false;
for (AnalyzedSentence sentence : sentences) { for (AnalyzedSentence sentence : sentences) {
AnalyzedTokenReadings[] tokens = sentence.getTokens(); AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
for (int n = 0; n < tokens.length; n++) { for (int n = 1; n < tokens.length; n++) {
AnalyzedTokenReadings token = tokens[n]; AnalyzedTokenReadings token = tokens[n];
String sToken = token.getToken(); String sToken = token.getToken();
if (OPENING_QUOTES.matcher(sToken).matches() && n < tokens.length -1 && !tokens[n + 1].isWhitespace()) { if (OPENING_QUOTES.matcher(sToken).matches() && n < tokens.length -1 && !tokens[n + 1].isWhitespaceBefore()) {
isDirectSpeech = true; isDirectSpeech = true;
} }
else if (ENDING_QUOTES.matcher(sToken).matches() else if (ENDING_QUOTES.matcher(sToken).matches() && n > 1 && !tokens[n].isWhitespaceBefore()) {
&& !tokens[n - 1].isWhitespace() && !tokens[n - 1].isSentenceStart()) {
isDirectSpeech = false; isDirectSpeech = false;
} }
else if ((!isDirectSpeech || minPercent == 0) && !token.isWhitespace() && !token.isSentenceStart() else if ((!isDirectSpeech || minPercent == 0) && !token.isWhitespace() && !NON_WORD_REGEX.matcher(sToken).matches()) {
&& !token.isSentenceEnd() && !NON_WORD_REGEX.matcher(sToken).matches()) {
wordCount++; wordCount++;
if (isFillerWord(sToken) && !isException(tokens, n)) { if (isFillerWord(sToken) && !isException(tokens, n)) {
startPos.add(token.getStartPos() + pos); startPos.add(token.getStartPos() + pos);
endPos.add(token.getEndPos() + pos); endPos.add(token.getEndPos() + pos);
relevantSentences.add(sentence); relevantSentences.add(sentence);
} }
} else if ("\n".equals(sToken) || "\r\n".equals(sToken) || "\n\r".equals(sToken)) { }
if(wordCount > 0) { }
percent = startPos.size() * 100.0 / wordCount; if (sentence.hasParagraphEndMark(lang)) {
} else { if(wordCount > 0) {
percent = 0; percent = startPos.size() * 100.0 / wordCount;
} } else {
if (percent > minPercent) { percent = 0;
for (int i = 0; i < startPos.size(); i++) { }
RuleMatch ruleMatch = new RuleMatch(this, sentence, startPos.get(i), endPos.get(i), msg); if (percent > minPercent) {
ruleMatches.add(ruleMatch); for (int i = 0; i < startPos.size(); i++) {
} RuleMatch ruleMatch = new RuleMatch(this, relevantSentences.get(i), startPos.get(i), endPos.get(i), msg);
ruleMatches.add(ruleMatch);
} }
wordCount = 0;
startPos = new ArrayList<>();
endPos = new ArrayList<>();
relevantSentences = new ArrayList<>();
} }
wordCount = 0;
startPos = new ArrayList<>();
endPos = new ArrayList<>();
relevantSentences = new ArrayList<>();
} }
pos += sentence.getText().length(); pos += sentence.getText().length();
} }
Expand Down
Expand Up @@ -192,7 +192,7 @@ public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfi
new CompoundCoherencyRule(messages), new CompoundCoherencyRule(messages),
new LongSentenceRule(messages, userConfig), new LongSentenceRule(messages, userConfig),
new LongParagraphRule(messages, this, userConfig), new LongParagraphRule(messages, this, userConfig),
new GermanFillerWordsRule(messages, userConfig), new GermanFillerWordsRule(messages, this, userConfig),
new GermanParagraphRepeatBeginningRule(messages, this), new GermanParagraphRepeatBeginningRule(messages, this),
new PunctuationMarkAtParagraphEnd(messages, this), new PunctuationMarkAtParagraphEnd(messages, this),
new DuUpperLowerCaseRule(messages), new DuUpperLowerCaseRule(messages),
Expand Down
Expand Up @@ -26,6 +26,7 @@
import javax.swing.JOptionPane; import javax.swing.JOptionPane;


import org.languagetool.AnalyzedTokenReadings; import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.UserConfig; import org.languagetool.UserConfig;
import org.languagetool.rules.AbstractFillerWordsRule; import org.languagetool.rules.AbstractFillerWordsRule;


Expand Down Expand Up @@ -59,8 +60,8 @@ public class GermanFillerWordsRule extends AbstractFillerWordsRule {
"womöglich","ziemlich","zudem","zugegeben","zumeist","zusehends","zuweilen","zweifellos","zweifelsfrei","zweifelsohne" "womöglich","ziemlich","zudem","zugegeben","zumeist","zusehends","zuweilen","zweifellos","zweifelsfrei","zweifelsohne"
)); ));


public GermanFillerWordsRule(ResourceBundle messages, UserConfig userConfig) { public GermanFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig) {
super(messages, userConfig); super(messages, lang, userConfig);
} }


@Override @Override
Expand All @@ -75,7 +76,7 @@ protected boolean isFillerWord(String token) {


@Override @Override
public boolean isException(AnalyzedTokenReadings[] tokens, int num) { public boolean isException(AnalyzedTokenReadings[] tokens, int num) {
if ("aber".equals(tokens[num].getToken()) && num >= 2 && ",".equals(tokens[num - 2].getToken())) { if ("aber".equals(tokens[num].getToken()) && num >= 1 && ",".equals(tokens[num - 1].getToken())) {
return true; return true;
} }
return false; return false;
Expand Down
Expand Up @@ -20,6 +20,7 @@


import org.junit.Test; import org.junit.Test;
import org.languagetool.JLanguageTool; import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.TestTools; import org.languagetool.TestTools;
import org.languagetool.UserConfig; import org.languagetool.UserConfig;
import org.languagetool.language.German; import org.languagetool.language.German;
Expand All @@ -35,10 +36,12 @@
* @author Fred Kruse * @author Fred Kruse
*/ */
public class GermanFillerWordsRuleTest { public class GermanFillerWordsRuleTest {

private final Language lang = new German();


@Test @Test
public void testRule() throws IOException { public void testRule() throws IOException {
JLanguageTool lt = new JLanguageTool(new German()); JLanguageTool lt = new JLanguageTool(lang);
setUpRule(lt, null); setUpRule(lt, null);


// more than 8% filler words (default) // more than 8% filler words (default)
Expand All @@ -63,7 +66,7 @@ private void setUpRule(JLanguageTool lt, UserConfig userConfig) {
lt.disableRule(rule.getId()); lt.disableRule(rule.getId());
} }
GermanFillerWordsRule rule = GermanFillerWordsRule rule =
new GermanFillerWordsRule(TestTools.getMessages(new German().getShortCode()), userConfig); new GermanFillerWordsRule(TestTools.getMessages(lang.getShortCode()), lang, userConfig);
lt.addRule(rule); lt.addRule(rule);
lt.enableRule(rule.getId()); lt.enableRule(rule.getId());
} }
Expand Down
Expand Up @@ -161,7 +161,7 @@ public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfi
new PortugueseReplaceRule(messages), new PortugueseReplaceRule(messages),
new PortugueseBarbarismsRule(messages), new PortugueseBarbarismsRule(messages),
new PortugueseClicheRule(messages), new PortugueseClicheRule(messages),
new PortugueseFillerWordsRule(messages, userConfig), new PortugueseFillerWordsRule(messages, this, userConfig),
new PortugueseRedundancyRule(messages), new PortugueseRedundancyRule(messages),
new PortugueseWordinessRule(messages), new PortugueseWordinessRule(messages),
new PortugueseWeaselWordsRule(messages), new PortugueseWeaselWordsRule(messages),
Expand Down
Expand Up @@ -24,6 +24,7 @@
import java.util.Set; import java.util.Set;


import org.languagetool.AnalyzedTokenReadings; import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.UserConfig; import org.languagetool.UserConfig;
import org.languagetool.rules.AbstractFillerWordsRule; import org.languagetool.rules.AbstractFillerWordsRule;


Expand Down Expand Up @@ -59,8 +60,8 @@ public class PortugueseFillerWordsRule extends AbstractFillerWordsRule {
"toda", "todas", "todo", "todos", "tudo", "ultrajante", "velho", "verdade", "vez", "vezes", "volta" "toda", "todas", "todo", "todos", "tudo", "ultrajante", "velho", "verdade", "vez", "vezes", "volta"
)); ));


public PortugueseFillerWordsRule(ResourceBundle messages, UserConfig userConfig) { public PortugueseFillerWordsRule(ResourceBundle messages, Language lang, UserConfig userConfig) {
super(messages, userConfig); super(messages, lang, userConfig);
} }


@Override @Override
Expand All @@ -75,7 +76,7 @@ protected boolean isFillerWord(String token) {


@Override @Override
public boolean isException(AnalyzedTokenReadings[] tokens, int num) { public boolean isException(AnalyzedTokenReadings[] tokens, int num) {
if ("mas".equals(tokens[num].getToken()) && num >= 2 && ",".equals(tokens[num - 2].getToken())) { if ("mas".equals(tokens[num].getToken()) && num >= 1 && ",".equals(tokens[num - 1].getToken())) {
return true; return true;
} }
return false; return false;
Expand Down

0 comments on commit 20a9daf

Please sign in to comment.