Skip to content

Commit

Permalink
small code cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
danielnaber committed Mar 11, 2015
1 parent 96273bb commit 44fc613
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 62 deletions.
Expand Up @@ -39,32 +39,26 @@
public class MultiWordChunker implements Disambiguator { public class MultiWordChunker implements Disambiguator {


private final String filename; private final String filename;
private final boolean allowFirstCapitalized;


private Map<String, Integer> mStartSpace; private Map<String, Integer> mStartSpace;
private Map<String, Integer> mStartNoSpace; private Map<String, Integer> mStartNoSpace;
private Map<String, String> mFull; private Map<String, String> mFull;


private boolean bAllowFirstCapitalized=false;

/** /**
* @param filename * @param filename file text with multiwords and tags
* file text with multiwords and tags
*/ */
public MultiWordChunker(final String filename) { public MultiWordChunker(final String filename) {
super(); this(filename, false);
this.filename = filename;
} }


/** /**
* @param filename * @param filename file text with multiwords and tags
* file text with multiwords and tags * @param allowFirstCapitalized if set to {@code true}, first word of the multiword can be capitalized
* @param bAllowFirstUpperCase
* if set to {@code true}, first word of the multiword can be capitalized
*/ */
public MultiWordChunker(final String filename, boolean allowFirstCapitalized) { public MultiWordChunker(final String filename, boolean allowFirstCapitalized) {
super();
this.filename = filename; this.filename = filename;
bAllowFirstCapitalized = allowFirstCapitalized; this.allowFirstCapitalized = allowFirstCapitalized;
} }


/* /*
Expand Down Expand Up @@ -144,7 +138,7 @@ public final AnalyzedSentence disambiguate(final AnalyzedSentence input) {
} }
// If the second token is not whitespace, concatenate it // If the second token is not whitespace, concatenate it
if (i + 1 < anTokens.length && !anTokens[i+1].isWhitespace()) { if (i + 1 < anTokens.length && !anTokens[i+1].isWhitespace()) {
tok=tok.concat(output[i+1].getToken()); tok = tok.concat(output[i+1].getToken());
} }
// If it is a capitalized word, the second time try with lowercase word. // If it is a capitalized word, the second time try with lowercase word.
int myCount = 0; int myCount = 0;
Expand All @@ -169,7 +163,7 @@ public final AnalyzedSentence disambiguate(final AnalyzedSentence input) {
anTokens[finalLen].getToken(), output[finalLen], true); anTokens[finalLen].getToken(), output[finalLen], true);
} }
} else { } else {
if (j>1 && !anTokens[j-1].isWhitespace()) { //avoid multiple whitespaces if (j > 1 && !anTokens[j-1].isWhitespace()) { //avoid multiple whitespaces
tokens.append(' '); tokens.append(' ');
lenCounter++; lenCounter++;
} }
Expand Down Expand Up @@ -202,7 +196,7 @@ public final AnalyzedSentence disambiguate(final AnalyzedSentence input) {
} }
// If it is a capitalized word, try with lowercase word. // If it is a capitalized word, try with lowercase word.
myCount++; myCount++;
if (bAllowFirstCapitalized && StringTools.isCapitalizedWord(tok) if (allowFirstCapitalized && StringTools.isCapitalizedWord(tok)
&& myCount == 1) { && myCount == 1) {
tok = tok.toLowerCase(); tok = tok.toLowerCase();
} else { } else {
Expand Down
Expand Up @@ -22,8 +22,6 @@
import java.util.List; import java.util.List;


import org.languagetool.rules.patterns.XMLRuleHandler; import org.languagetool.rules.patterns.XMLRuleHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;


/** /**
* XML rule handler that loads disambiguation rules from XML and throws * XML rule handler that loads disambiguation rules from XML and throws
Expand All @@ -35,20 +33,8 @@ class DisambXMLRuleHandler extends XMLRuleHandler {


final List<DisambiguationPatternRule> rules = new ArrayList<>(); final List<DisambiguationPatternRule> rules = new ArrayList<>();


boolean inDisambiguation;

List<DisambiguationPatternRule> getDisambRules() { List<DisambiguationPatternRule> getDisambRules() {
return rules; return rules;
} }


@Override
public void warning(final SAXParseException e) throws SAXException {
throw e;
}

@Override
public void error(final SAXParseException e) throws SAXException {
throw e;
}

} }
Expand Up @@ -19,7 +19,10 @@
package org.languagetool.tagging.disambiguation.rules; package org.languagetool.tagging.disambiguation.rules;


import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Objects;


import org.languagetool.AnalyzedSentence; import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken; import org.languagetool.AnalyzedToken;
Expand All @@ -46,8 +49,8 @@ public enum DisambiguatorAction {
private final DisambiguatorAction disAction; private final DisambiguatorAction disAction;


private AnalyzedToken[] newTokenReadings; private AnalyzedToken[] newTokenReadings;
private List<DisambiguatedExample> examples; private List<DisambiguatedExample> examples = new ArrayList<>();
private List<String> untouchedExamples; private List<String> untouchedExamples = new ArrayList<>();


/** /**
* @param id Id of the Rule * @param id Id of the Rule
Expand Down Expand Up @@ -106,35 +109,35 @@ public final AnalyzedSentence replace(final AnalyzedSentence sentence) throws IO
* @param examples the examples to set * @param examples the examples to set
*/ */
public void setExamples(final List<DisambiguatedExample> examples) { public void setExamples(final List<DisambiguatedExample> examples) {
this.examples = examples; this.examples = Objects.requireNonNull(examples);
} }


/** /**
* @return the examples * @return the examples
*/ */
public List<DisambiguatedExample> getExamples() { public List<DisambiguatedExample> getExamples() {
return examples; return Collections.unmodifiableList(examples);
} }


/** /**
* @param untouchedExamples the untouchedExamples to set * @param untouchedExamples the untouchedExamples to set
*/ */
public void setUntouchedExamples(final List<String> untouchedExamples) { public void setUntouchedExamples(final List<String> untouchedExamples) {
this.untouchedExamples = untouchedExamples; this.untouchedExamples = Objects.requireNonNull(untouchedExamples);
} }


/** /**
* @return the untouchedExamples * @return the untouchedExamples
*/ */
public List<String> getUntouchedExamples() { public List<String> getUntouchedExamples() {
return untouchedExamples; return Collections.unmodifiableList(untouchedExamples);
} }


/** /**
* For testing only. * For testing only.
*/ */
public final List<Element> getElements() { public final List<Element> getElements() {
return patternElements; return Collections.unmodifiableList(patternElements);
} }


/** /**
Expand Down
Expand Up @@ -37,7 +37,7 @@
*/ */
class DisambiguationPatternRuleReplacer extends AbstractPatternRulePerformer { class DisambiguationPatternRuleReplacer extends AbstractPatternRulePerformer {


List<Boolean> elementsMatched; private final List<Boolean> elementsMatched;


public DisambiguationPatternRuleReplacer(DisambiguationPatternRule rule) { public DisambiguationPatternRuleReplacer(DisambiguationPatternRule rule) {
super(rule, rule.getLanguage().getDisambiguationUnifier()); super(rule, rule.getLanguage().getDisambiguationUnifier());
Expand Down
Expand Up @@ -69,6 +69,7 @@ class DisambiguationRuleHandler extends DisambXMLRuleHandler {
private static final String ACTION = "action"; private static final String ACTION = "action";
private static final String DISAMBIG = "disambig"; private static final String DISAMBIG = "disambig";


private boolean inDisambiguation;
private int subId; private int subId;
private String name; private String name;
private String ruleGroupId; private String ruleGroupId;
Expand Down
Expand Up @@ -41,26 +41,23 @@
public class XmlRuleDisambiguator implements Disambiguator { public class XmlRuleDisambiguator implements Disambiguator {


private static final String DISAMBIGUATION_FILE = "disambiguation.xml"; private static final String DISAMBIGUATION_FILE = "disambiguation.xml";


private List<DisambiguationPatternRule> disambiguationRules; private final List<DisambiguationPatternRule> disambiguationRules;
private final Language language;


public XmlRuleDisambiguator(final Language language) { public XmlRuleDisambiguator(final Language language) {
this.language = Objects.requireNonNull(language); Objects.requireNonNull(language);
final String disambiguationFile =
JLanguageTool.getDataBroker().getResourceDir() + "/" + language.getShortName() + "/" + DISAMBIGUATION_FILE;
try {
disambiguationRules = loadPatternRules(disambiguationFile);
} catch (Exception e) {
throw new RuntimeException("Problems with loading disambiguation file: " + disambiguationFile, e);
}
} }


@Override @Override
public AnalyzedSentence disambiguate(final AnalyzedSentence input) throws IOException { public AnalyzedSentence disambiguate(final AnalyzedSentence input) throws IOException {
AnalyzedSentence sentence = input; AnalyzedSentence sentence = input;
if (disambiguationRules == null) {
final String disambiguationFile =
JLanguageTool.getDataBroker().getResourceDir() + "/" + language.getShortName() + "/" + DISAMBIGUATION_FILE;
try {
disambiguationRules = loadPatternRules(disambiguationFile);
} catch (final Exception e) {
throw new RuntimeException("Problems with loading disambiguation file: " + disambiguationFile, e);
}
}
for (final DisambiguationPatternRule patternRule : disambiguationRules) { for (final DisambiguationPatternRule patternRule : disambiguationRules) {
sentence = patternRule.replace(sentence); sentence = patternRule.replace(sentence);
} }
Expand Down
Expand Up @@ -20,7 +20,9 @@


import net.sourceforge.segment.srx.SrxDocument; import net.sourceforge.segment.srx.SrxDocument;
import org.languagetool.Language; import org.languagetool.Language;
import org.languagetool.tools.Tools;


import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
Expand All @@ -44,11 +46,12 @@ public class LocalSRXSentenceTokenizer implements SentenceTokenizer {
*/ */
public LocalSRXSentenceTokenizer(Language language, String srxInClassPath) { public LocalSRXSentenceTokenizer(Language language, String srxInClassPath) {
this.language = Objects.requireNonNull(language); this.language = Objects.requireNonNull(language);
InputStream stream = this.getClass().getResourceAsStream(srxInClassPath); try {
if (stream == null) { InputStream stream = Tools.getStream(srxInClassPath);
throw new RuntimeException("Could not find SRX file in classpath: " + srxInClassPath); this.srxDocument = SrxTools.createSrxDocument(stream); // will close the stream on its own
} catch (IOException e) {
throw new RuntimeException(e);
} }
this.srxDocument = SrxTools.createSrxDocument(stream); // will close the stream on its own
setSingleLineBreaksMarksParagraph(false); setSingleLineBreaksMarksParagraph(false);
} }


Expand Down
Expand Up @@ -28,7 +28,7 @@


/** /**
* A very simple sentence tokenizer that splits on {@code [.!?…]} followed by whitespace * A very simple sentence tokenizer that splits on {@code [.!?…]} followed by whitespace
* or an uppercase letter. You probably want ot use an adapted {@link LocalSRXSentenceTokenizer} instead. * or an uppercase letter. You probably want to use an adapted {@link LocalSRXSentenceTokenizer} instead.
* @since 2.6 * @since 2.6
*/ */
public class SimpleSentenceTokenizer extends LocalSRXSentenceTokenizer { public class SimpleSentenceTokenizer extends LocalSRXSentenceTokenizer {
Expand Down
Expand Up @@ -242,16 +242,16 @@ public static String getFullStackTrace(final Throwable e) {


/** /**
* Load a file from the classpath using {@link Class#getResourceAsStream(String)}. * Load a file from the classpath using {@link Class#getResourceAsStream(String)}.
* *
* @return the stream of the file * @return the stream of the file
*/ */
public static InputStream getStream(final String filename) throws IOException { public static InputStream getStream(final String path) throws IOException {
// the other ways to load the stream like // the other ways to load the stream like
// "Tools.class.getClass().getResourceAsStream(filename)" // "Tools.class.getClass().getResourceAsStream(filename)"
// don't work in a web context (using Grails): // don't work in a web context (using Grails):
final InputStream is = Tools.class.getResourceAsStream(filename); final InputStream is = Tools.class.getResourceAsStream(path);
if (is == null) { if (is == null) {
throw new IOException("Could not load file from classpath : " + filename); throw new IOException("Could not load file from classpath: '" + path + "'");
} }
return is; return is;
} }
Expand Down
Expand Up @@ -27,6 +27,7 @@
* this one is based on ArrayList). Usage is the same as the java.util.Stack. * this one is based on ArrayList). Usage is the same as the java.util.Stack.
* *
* @author Marcin Miłkowski. * @author Marcin Miłkowski.
* @deprecated will be made non-public in the future (deprecated since 2.9)
*/ */
public class UnsyncStack<E> extends ArrayList<E> { public class UnsyncStack<E> extends ArrayList<E> {


Expand Down
Expand Up @@ -23,14 +23,13 @@


public class SimpleSentenceTokenizerTest { public class SimpleSentenceTokenizerTest {


private static final SimpleSentenceTokenizer tokenizer = new SimpleSentenceTokenizer();

@Test @Test
public void testTokenize() throws Exception { public void testTokenize() throws Exception {
testSplit("Hi! ", "This is a test. ", "Here's more. ", "And even more?? ", "Yes."); testSplit("Hi! ", "This is a test. ", "Here's more. ", "And even more?? ", "Yes.");
} }


private void testSplit(String... sentences) { private void testSplit(String... sentences) {
SimpleSentenceTokenizer tokenizer = new SimpleSentenceTokenizer();
TestTools.testSplit(sentences, tokenizer); TestTools.testSplit(sentences, tokenizer);
} }


Expand Down

0 comments on commit 44fc613

Please sign in to comment.