Skip to content

Commit

Permalink
[de] use pre-compiled Patterns from case_rule_exceptions.txt in DE_CASE
Browse files Browse the repository at this point in the history
  • Loading branch information
f-knorr committed Jan 13, 2018
1 parent 81bca84 commit 7226277
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 19 deletions.
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -691,6 +691,7 @@ private static PatternToken posRegex(String posTag) {
} }


private static final Set<String> myExceptionPhrases = CaseRuleExceptions.getExceptions(); private static final Set<String> myExceptionPhrases = CaseRuleExceptions.getExceptions();
private static final Set<Pattern[]> exceptionPatterns = CaseRuleExceptions.getExceptionPatterns();


private static final Set<String> substVerbenExceptions = new HashSet<>(); private static final Set<String> substVerbenExceptions = new HashSet<>();
static { static {
Expand Down Expand Up @@ -1205,12 +1206,11 @@ private boolean isFollowedByRelativeOrSubordinateClause(int i, AnalyzedTokenRead
} }


private boolean isExceptionPhrase(int i, AnalyzedTokenReadings[] tokens) { private boolean isExceptionPhrase(int i, AnalyzedTokenReadings[] tokens) {
for (String phrase : myExceptionPhrases) { for (Pattern[] patterns : exceptionPatterns) {
String[] parts = phrase.split(" "); for (int j = 0; j < patterns.length; j++) {
for (int j = 0; j < parts.length; j++) { if (patterns[j].matcher(tokens[i].getToken()).matches()) {
if (tokens[i].getToken().matches(parts[j])) {
int startIndex = i-j; int startIndex = i-j;
if (compareLists(tokens, startIndex, startIndex+parts.length-1, parts)) { if (compareLists(tokens, startIndex, startIndex+patterns.length-1, patterns)) {
return true; return true;
} }
} }
Expand All @@ -1220,16 +1220,16 @@ private boolean isExceptionPhrase(int i, AnalyzedTokenReadings[] tokens) {
} }


// non-private for tests // non-private for tests
boolean compareLists(AnalyzedTokenReadings[] tokens, int startIndex, int endIndex, String[] parts) { boolean compareLists(AnalyzedTokenReadings[] tokens, int startIndex, int endIndex, Pattern[] patterns) {
if (startIndex < 0) { if (startIndex < 0) {
return false; return false;
} }
int i = 0; int i = 0;
for (int j = startIndex; j <= endIndex; j++) { for (int j = startIndex; j <= endIndex; j++) {
if (i >= parts.length || j >= tokens.length) { if (i >= patterns.length || j >= tokens.length) {
return false; return false;
} }
if (!tokens[j].getToken().matches(parts[i])) { if (!patterns[i].matcher(tokens[j].getToken()).matches()) {
return false; return false;
} }
i++; i++;
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import java.util.regex.Pattern;


/** /**
* @since 3.0 * @since 3.0
Expand All @@ -39,6 +40,19 @@ public static Set<String> getExceptions() {
return exceptions; return exceptions;
} }


public static Set<Pattern[]> getExceptionPatterns() {
HashSet<Pattern[]> exceptionPatterns = new HashSet<>(250);
for (String phrase : exceptions) {
String[] parts = phrase.split(" ");
Pattern[] patterns = new Pattern[parts.length];
for (int j = 0; j < parts.length; j++) {
patterns[j] = Pattern.compile(parts[j]);
}
exceptionPatterns.add(patterns);
}
return exceptionPatterns;
}

private static Set<String> loadExceptions(String path) { private static Set<String> loadExceptions(String path) {
Set<String> result = new HashSet<>(); Set<String> result = new HashSet<>();
try ( try (
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@
*/ */
package org.languagetool.rules.de; package org.languagetool.rules.de;


import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.regex.Pattern;

import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.languagetool.AnalyzedSentence; import org.languagetool.AnalyzedSentence;
import org.languagetool.JLanguageTool; import org.languagetool.JLanguageTool;
import org.languagetool.TestTools; import org.languagetool.TestTools;
import org.languagetool.language.GermanyGerman; import org.languagetool.language.GermanyGerman;


import java.io.IOException;

import static org.junit.Assert.*;

public class CaseRuleTest { public class CaseRuleTest {


private CaseRule rule; private CaseRule rule;
Expand Down Expand Up @@ -155,7 +158,7 @@ public void testRule() throws IOException {
assertGood("Schon Le Monde schrieb das."); assertGood("Schon Le Monde schrieb das.");
// unknown word: // unknown word:
assertGood("In Blubberdorf macht man das so."); assertGood("In Blubberdorf macht man das so.");
// Exception definied in case_rule_exceptions.txt: // Exception defined in case_rule_exceptions.txt:
assertGood("Der Thriller spielt zur Zeit des Zweiten Weltkriegs"); assertGood("Der Thriller spielt zur Zeit des Zweiten Weltkriegs");


assertGood("Anders als physikalische Konstanten werden mathematische Konstanten unabhängig von jedem physikalischen Maß definiert."); assertGood("Anders als physikalische Konstanten werden mathematische Konstanten unabhängig von jedem physikalischen Maß definiert.");
Expand Down Expand Up @@ -355,13 +358,13 @@ public void testPhraseExceptions() throws IOException {
@Test @Test
public void testCompareLists() throws IOException { public void testCompareLists() throws IOException {
AnalyzedSentence sentence1 = lt.getAnalyzedSentence("Hier ein Test"); AnalyzedSentence sentence1 = lt.getAnalyzedSentence("Hier ein Test");
assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 2, new String[]{"", "Hier", "ein"})); assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 2, new Pattern[]{Pattern.compile(""), Pattern.compile("Hier"), Pattern.compile("ein")}));
assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 1, 2, new String[]{"Hier", "ein"})); assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 1, 2, new Pattern[]{Pattern.compile("Hier"), Pattern.compile("ein")}));
assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 3, new String[]{"", "Hier", "ein", "Test"})); assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 3, new Pattern[]{Pattern.compile(""), Pattern.compile("Hier"), Pattern.compile("ein"), Pattern.compile("Test")}));
assertFalse(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 4, new String[]{"", "Hier", "ein", "Test"})); assertFalse(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 4, new Pattern[]{Pattern.compile(""), Pattern.compile("Hier"), Pattern.compile("ein"), Pattern.compile("Test")}));


AnalyzedSentence sentence2 = lt.getAnalyzedSentence("das Heilige Römische Reich"); AnalyzedSentence sentence2 = lt.getAnalyzedSentence("das Heilige Römische Reich");
assertTrue(rule.compareLists(sentence2.getTokensWithoutWhitespace(), 0, 4, new String[]{"", "das", "Heilige", "Römische", "Reich"})); assertTrue(rule.compareLists(sentence2.getTokensWithoutWhitespace(), 0, 4, new Pattern[]{Pattern.compile(""), Pattern.compile("das"), Pattern.compile("Heilige"), Pattern.compile("Römische"), Pattern.compile("Reich")}));
assertFalse(rule.compareLists(sentence2.getTokensWithoutWhitespace(), 8, 11, new String[]{"", "das", "Heilige", "Römische", "Reich"})); assertFalse(rule.compareLists(sentence2.getTokensWithoutWhitespace(), 8, 11, new Pattern[]{Pattern.compile(""), Pattern.compile("das"), Pattern.compile("Heilige"), Pattern.compile("Römische"), Pattern.compile("Reich")}));
} }
} }

0 comments on commit 7226277

Please sign in to comment.