Skip to content

Commit

Permalink
[de] properly consider spelling.txt for Swiss German, i.e. don't sugg…
Browse files Browse the repository at this point in the history
…est words with "ß" even when listed in spelling.txt
  • Loading branch information
danielnaber committed Feb 10, 2015
1 parent 68c7bce commit 32b7e4c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
Expand Up @@ -37,7 +37,9 @@ public abstract class CompoundAwareHunspellRule extends HunspellRule {

private final CompoundWordTokenizer wordSplitter;
private final MorfologikMultiSpeller morfoSpeller;


protected abstract void filterForLanguage(List<String> suggestions);

public CompoundAwareHunspellRule(ResourceBundle messages, Language language, CompoundWordTokenizer wordSplitter, MorfologikMultiSpeller morfoSpeller) {
super(messages, language);
this.wordSplitter = wordSplitter;
Expand All @@ -47,7 +49,7 @@ public CompoundAwareHunspellRule(ResourceBundle messages, Language language, Com
/**
* As a hunspell-based approach is too slow, we use Morfologik to create suggestions. As this
* won't work for compounds not in the dictionary, we split the word and also get suggestions
* on the compound parts. In the end, all candidates are filtered against Hunspell again (which
* on the compound parts. In the end, all candidates are filtered against Hunspell again (which
* supports compounds).
*/
@Override
Expand Down Expand Up @@ -95,6 +97,7 @@ public List<String> getSuggestions(String word) throws IOException {
suggestions.addAll(0, noSplitSuggestions);

filterDupes(suggestions);
filterForLanguage(suggestions);
final List<String> sortedSuggestions = sortSuggestionByQuality(word, suggestions);
return sortedSuggestions.subList(0, Math.min(MAX_SUGGESTIONS, sortedSuggestions.size()));
}
Expand Down
Expand Up @@ -101,6 +101,16 @@ private static MorfologikMultiSpeller getSpeller(Language language) {
}
}

@Override
protected void filterForLanguage(List<String> suggestions) {
if (language.getShortNameWithCountryAndVariant().equals("de-CH")) {
for (int i = 0; i < suggestions.size(); i++) {
String s = suggestions.get(i);
suggestions.set(i, s.replace("ß", "ss"));
}
}
}

// Use hunspell-style replacements to get good suggestions for "heisse", namely "heiße" etc
// TODO: remove this when the Morfologik speller can do this directly during tree iteration:
@Override
Expand Down
Expand Up @@ -32,6 +32,7 @@
import java.util.List;
import java.util.ResourceBundle;

import static junit.framework.TestCase.assertFalse;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
Expand All @@ -40,6 +41,7 @@
public class GermanSpellerRuleTest {

private static final GermanyGerman GERMAN_DE = new GermanyGerman();
private static final SwissGerman GERMAN_CH = new SwissGerman();

@Test
public void testSortSuggestion() throws Exception {
Expand Down Expand Up @@ -71,15 +73,20 @@ public void testDashAndHyphen() throws Exception {

@Test
public void testGetSuggestionsFromSpellingTxt() throws Exception {
MyGermanSpellerRule rule = new MyGermanSpellerRule(TestTools.getMessages("de"), GERMAN_DE);
assertThat(rule.getSuggestions("Ligafußboll").toString(), is("[Ligafußball, Ligafußballs]")); // from spelling.txt
MyGermanSpellerRule ruleGermany = new MyGermanSpellerRule(TestTools.getMessages("de"), GERMAN_DE);
assertThat(ruleGermany.getSuggestions("Ligafußboll").toString(), is("[Ligafußball, Ligafußballs]")); // from spelling.txt
MyGermanSpellerRule ruleSwiss = new MyGermanSpellerRule(TestTools.getMessages("de"), GERMAN_CH);
assertThat(ruleSwiss.getSuggestions("Ligafußboll").toString(), is("[Ligafussball, Ligafussballs]"));
}

@Test
public void testIgnoreWord() throws Exception {
MyGermanSpellerRule rule = new MyGermanSpellerRule(TestTools.getMessages("de"), GERMAN_DE);
assertTrue(rule.doIgnoreWord("einPseudoWortFürLanguageToolTests")); // from ignore.txt
assertTrue(rule.doIgnoreWord("Ligafußball")); // from spelling.txt
MyGermanSpellerRule ruleGermany = new MyGermanSpellerRule(TestTools.getMessages("de"), GERMAN_DE);
assertTrue(ruleGermany.doIgnoreWord("einPseudoWortFürLanguageToolTests")); // from ignore.txt
assertTrue(ruleGermany.doIgnoreWord("Ligafußball")); // from spelling.txt
MyGermanSpellerRule ruleSwiss = new MyGermanSpellerRule(TestTools.getMessages("de"), GERMAN_CH);
assertTrue(ruleSwiss.doIgnoreWord("einPseudoWortFürLanguageToolTests"));
assertFalse(ruleSwiss.doIgnoreWord("Ligafußball")); // 'ß' never accepted for Swiss
}

private class MyGermanSpellerRule extends GermanSpellerRule {
Expand Down

0 comments on commit 32b7e4c

Please sign in to comment.