Skip to content

Commit

Permalink
[de] prohibit compounds that are (probably) wrong; enable use of suff…
Browse files Browse the repository at this point in the history
…ixes for the prohibit.txt file
  • Loading branch information
danielnaber committed May 19, 2015
1 parent 4523b8b commit babcc4f
Show file tree
Hide file tree
Showing 4 changed files with 465 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,15 @@ protected void addIgnoreWords(String line, Set<String> wordsToBeIgnored) {
wordsToBeIgnored.add(line);
}

/**
* Expand suffixes in a line. By default, the line is not expanded.
* Implementations might e.g. turn {@code bicycle/S} into {@code [bicycle, bicycles]}.
* @since 3.0
*/
protected List<String> expandLine(String line) {
return Collections.singletonList(line);
}

private void loadWordsToBeProhibited(String prohibitFile) throws IOException {
if (!JLanguageTool.getDataBroker().resourceExists(prohibitFile)) {
return;
Expand All @@ -285,7 +294,7 @@ private void loadWordsToBeProhibited(String prohibitFile) throws IOException {
continue;
}
failOnSpace(prohibitFile, line);
wordsToBeProhibited.add(line);
wordsToBeProhibited.addAll(expandLine(line));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ public class GermanSpellerRule extends CompoundAwareHunspellRule {
new Replacement("Ph", "F")
);

private final LineExpander lineExpander = new LineExpander();
private final GermanCompoundTokenizer compoundTokenizer;
private final GermanWordSplitter splitter;
private final Synthesizer synthesizer;
Expand Down Expand Up @@ -126,36 +127,9 @@ protected void addIgnoreWords(String origLine, Set<String> wordsToBeIgnored) {
wordsToBeIgnored.addAll(expandLine(line));
}

private static List<String> expandLine(String line) {
List<String> result = new ArrayList<>();
if (!line.startsWith("#") && line.contains("/")) {
String[] parts = line.split("/");
if (parts.length != 2) {
throw new RuntimeException("Unexpected line format, expected at most one slash: " + line);
}
String word = parts[0];
String suffix = parts[1];
result.add(word);
for (int i = 0; i < suffix.length(); i++) {
char c = suffix.charAt(i);
if (c == 'S') {
result.add(word + "s");
} else if (c == 'N') {
result.add(word + "n");
} else if (c == 'A') { // Adjektiv
result.add(word + "e");
result.add(word + "er");
result.add(word + "es");
result.add(word + "en");
result.add(word + "em");
} else {
throw new RuntimeException("Unknown suffix: " + suffix + " in line: " + line);
}
}
} else {
result.add(line);
}
return result;
@Override
protected List<String> expandLine(String line) {
return lineExpander.expandLine(line);
}

@Nullable
Expand Down Expand Up @@ -359,6 +333,7 @@ private Replacement(String key, String value) {
static class ExpandingReader extends BufferedReader {

private final List<String> buffer = new ArrayList<>();
private final LineExpander lineExpander = new LineExpander();

ExpandingReader(Reader in) {
super(in);
Expand All @@ -373,7 +348,7 @@ public String readLine() throws IOException {
if (line == null) {
return null;
}
buffer.addAll(expandLine(line));
buffer.addAll(lineExpander.expandLine(line));
return buffer.remove(0);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2015 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.de;

import java.util.ArrayList;
import java.util.List;

/**
* Expand lines according to their suffix, e.g. {@code foo/S} becomes {@code [foo, foos]}.
* @since 3.0
*/
class LineExpander {

List<String> expandLine(String line) {
List<String> result = new ArrayList<>();
if (!line.startsWith("#") && line.contains("/")) {
String[] parts = line.split("/");
if (parts.length != 2) {
throw new RuntimeException("Unexpected line format, expected at most one slash: " + line);
}
String word = parts[0];
String suffix = parts[1];
result.add(word);
for (int i = 0; i < suffix.length(); i++) {
char c = suffix.charAt(i);
if (c == 'S') {
result.add(word + "s");
} else if (c == 'N') {
result.add(word + "n");
} else if (c == 'A') { // Adjektiv
result.add(word + "e");
result.add(word + "er");
result.add(word + "es");
result.add(word + "en");
result.add(word + "em");
} else {
throw new RuntimeException("Unknown suffix: " + suffix + " in line: " + line);
}
}
} else {
result.add(line);
}
return result;
}

}
Loading

0 comments on commit babcc4f

Please sign in to comment.