Skip to content

Commit

Permalink
cleanup: move fields that belong together to their own inner class
Browse files Browse the repository at this point in the history
  • Loading branch information
danielnaber committed Mar 17, 2015
1 parent 7114283 commit 7513464
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 58 deletions.
Expand Up @@ -50,11 +50,9 @@ public class PatternToken implements Cloneable {
private final boolean stringRegExp;

private String stringToken;
private String posToken;
private ChunkTag chunkToken;
private boolean posRegExp;
private PosToken posToken;
private ChunkTag chunkTag;
private boolean negation;
private boolean posNegation;
private boolean inflected;
private boolean testWhitespace;
private boolean whitespaceBefore;
Expand Down Expand Up @@ -84,7 +82,6 @@ public class PatternToken implements Cloneable {
private int maxOccurrence = 1;

private Pattern pattern;
private Pattern posPattern;

/** The reference to another element in the pattern. **/
private Match tokenReference;
Expand All @@ -107,7 +104,6 @@ public class PatternToken implements Cloneable {

private boolean uniNegation;
private Map<String, List<String>> unificationFeatures;
private boolean posUnknown;

/** Set to true on tokens that close the unification block. */
private boolean isLastUnified;
Expand Down Expand Up @@ -142,6 +138,7 @@ public final boolean isMatched(final AnalyzedToken token) {
return false;
}
final boolean matched;
boolean posNegation = posToken != null && posToken.negation;
if (testString) {
matched = isStringTokenMatched(token) ^ negation &&
isPosTokenMatched(token) ^ posNegation;
Expand Down Expand Up @@ -318,25 +315,25 @@ public final boolean isMatchedByPreviousException(final AnalyzedTokenReadings pr
* @return True if the element starts the sentence and the element hasn't been set to have negated POS token.
*/
public final boolean isSentenceStart() {
return JLanguageTool.SENTENCE_START_TAGNAME.equals(posToken) && !posNegation;
return posToken != null && JLanguageTool.SENTENCE_START_TAGNAME.equals(posToken.posTag) && !posToken.negation;
}

/** @since 2.9 */
public final void setPosToken(PosToken posToken) {
this.posToken = posToken;
}

/**
* @deprecated use {@link #setPosToken(PosToken)} instead (deprecated since 2.9)
*/
public final void setPosElement(final String posToken, final boolean regExp,
final boolean negation) {
this.posToken = posToken;
this.posNegation = negation;
posRegExp = regExp;
if (posRegExp) {
posPattern = Pattern.compile(posToken);
posUnknown = posPattern.matcher(UNKNOWN_TAG).matches();
} else {
posUnknown = UNKNOWN_TAG.equals(posToken);
}
this.posToken = new PosToken(posToken, regExp, negation);
}

/** @since 2.3 */
public final void setChunkElement(final ChunkTag chunkTag) {
this.chunkToken = chunkTag;
this.chunkTag = chunkTag;
}

@Nullable
Expand Down Expand Up @@ -383,7 +380,7 @@ public final void setStringPosException(
final String posToken, final boolean posRegExp, final boolean posNegation, final Boolean caseSensitivity) {
final PatternToken exception = new PatternToken(token, caseSensitivity == null ? caseSensitive : caseSensitivity, regExp, inflected);
exception.setNegation(negation);
exception.setPosElement(posToken, posRegExp, posNegation);
exception.setPosToken(new PosToken(posToken, posRegExp, posNegation));
exception.exceptionValidNext = scopeNext;
setException(exception, scopePrevious);
}
Expand Down Expand Up @@ -445,7 +442,7 @@ public final void setOptionalException(

final PatternToken exception = new PatternToken(token, caseSensitive, regExp, inflected);
exception.setNegation(negation);
exception.setPosElement(posToken, posRegExp, posNegation);
exception.setPosToken(new PosToken(posToken, posRegExp, posNegation));
setException(exception, false);
}

Expand All @@ -456,21 +453,21 @@ public final void setOptionalException(
* @return true if matches
*/
private boolean isPosTokenMatched(final AnalyzedToken token) {
if (posToken == null) {
if (posToken == null || posToken.posTag == null) {
// if no POS set defaulting to true
return true;
}
if (token.getPOSTag() == null) {
return posUnknown && token.hasNoTag();
return posToken.posUnknown && token.hasNoTag();
}
boolean match;
if (posRegExp) {
final Matcher mPos = posPattern.matcher(token.getPOSTag());
if (posToken.regExp) {
final Matcher mPos = posToken.posPattern.matcher(token.getPOSTag());
match = mPos.matches();
} else {
match = posToken.equals(token.getPOSTag());
match = posToken.posTag.equals(token.getPOSTag());
}
if (!match && posUnknown) { // ignore helper tags
if (!match && posToken.posUnknown) { // ignore helper tags
match = token.hasNoTag();
}
return match;
Expand Down Expand Up @@ -634,7 +631,7 @@ void doCompile(final AnalyzedTokenReadings token, final Synthesizer synth) throw
if (tokenReference.setsPos()) {
final String posReference = matchState.getTargetPosTag();
if (posReference != null) {
setPosElement(posReference, tokenReference.posRegExp(), negation);
setPosToken(new PosToken(posReference, tokenReference.posRegExp(), negation));
}
setStringElement(referenceString.replace("\\" + tokenReference.getTokenRef(), ""));
inflected = true;
Expand Down Expand Up @@ -681,7 +678,7 @@ public final boolean isRegularExpression() {
* @since 1.3.0
*/
public final boolean isPOStagRegularExpression() {
return posRegExp;
return posToken != null && posToken.regExp;
}

/**
Expand All @@ -690,7 +687,7 @@ public final boolean isPOStagRegularExpression() {
*/
@Nullable
public final String getPOStag() {
return posToken;
return posToken != null ? posToken.posTag : null;
}

/**
Expand All @@ -699,14 +696,14 @@ public final String getPOStag() {
*/
@Nullable
public final ChunkTag getChunkTag() {
return chunkToken;
return chunkTag;
}

/**
* @return true if the POS is negated.
*/
public final boolean getPOSNegation() {
return posNegation;
return posToken != null && posToken.negation;
}

/**
Expand Down Expand Up @@ -854,9 +851,9 @@ public final String toString() {
sb.append('/');
sb.append(posToken);
}
if (chunkToken != null) {
if (chunkTag != null) {
sb.append('/');
sb.append(chunkToken);
sb.append(chunkTag);
}
if (exceptionList != null) {
sb.append("/exceptions=");
Expand All @@ -865,4 +862,30 @@ public final String toString() {
return sb.toString();
}

public static class PosToken {

private final String posTag;
private final boolean regExp;
private final boolean negation;
private final Pattern posPattern;
private final boolean posUnknown;

public PosToken(String posTag, boolean regExp, boolean negation) {
this.posTag = posTag;
this.regExp = regExp;
this.negation = negation;
if (regExp) {
posPattern = Pattern.compile(posTag);
posUnknown = posPattern.matcher(UNKNOWN_TAG).matches();
} else {
posPattern = null;
posUnknown = UNKNOWN_TAG.equals(posTag);
}
}

@Override
public String toString() {
return posTag;
}
}
}
Expand Up @@ -574,7 +574,7 @@ protected void finalizeTokens() throws SAXException {
maxOccurrence = 1;
}
if (posToken != null) {
patternToken.setPosElement(posToken, posRegExp, posNegation);
patternToken.setPosToken(new PatternToken.PosToken(posToken, posRegExp, posNegation));
posToken = null;
}
if (chunkTag != null) {
Expand Down
Expand Up @@ -339,7 +339,7 @@ public void endElement(final String namespaceURI, final String sName,
maxOccurrence = 1;
}
if (posToken != null) {
patternToken.setPosElement(posToken, posRegExp, posNegation);
patternToken.setPosToken(new PatternToken.PosToken(posToken, posRegExp, posNegation));
posToken = null;
}

Expand Down
Expand Up @@ -495,7 +495,7 @@ protected PatternRule makePatternRule(final String s, final boolean caseSensitiv
pToken = new PatternToken("", caseSensitive, regex, false);
}
if (pos) {
pToken.setPosElement(element, false, false);
pToken.setPosToken(new PatternToken.PosToken(element, false, false));
}
patternTokens.add(pToken);
pos = false;
Expand Down
Expand Up @@ -21,42 +21,45 @@

import junit.framework.TestCase;
import org.languagetool.AnalyzedToken;
import org.languagetool.JLanguageTool;

import static org.languagetool.JLanguageTool.PARAGRAPH_END_TAGNAME;
import static org.languagetool.JLanguageTool.SENTENCE_END_TAGNAME;
import static org.languagetool.JLanguageTool.SENTENCE_START_TAGNAME;
import static org.languagetool.rules.patterns.PatternToken.UNKNOWN_TAG;

public class PatternTokenTest extends TestCase {

public void testSentenceStart() {
final PatternToken patternToken = new PatternToken("", false, false, false);
patternToken.setPosElement(JLanguageTool.SENTENCE_START_TAGNAME, false, false);
patternToken.setPosToken(new PatternToken.PosToken(SENTENCE_START_TAGNAME, false, false));
assertTrue(patternToken.isSentenceStart());
patternToken.setPosElement(JLanguageTool.SENTENCE_START_TAGNAME, false, true);
patternToken.setPosToken(new PatternToken.PosToken(SENTENCE_START_TAGNAME, false, true));
assertFalse(patternToken.isSentenceStart());
patternToken.setPosElement(JLanguageTool.SENTENCE_START_TAGNAME, true, false);
patternToken.setPosToken(new PatternToken.PosToken(SENTENCE_START_TAGNAME, true, false));
assertTrue(patternToken.isSentenceStart());
patternToken.setPosElement(JLanguageTool.SENTENCE_START_TAGNAME, true, true);
patternToken.setPosToken(new PatternToken.PosToken(SENTENCE_START_TAGNAME, true, true));
assertFalse(patternToken.isSentenceStart());

//this should be false:
final PatternToken patternToken2 = new PatternToken("bla|blah", false, true, false);
patternToken2.setPosElement("foo", true, true);
patternToken2.setPosToken(new PatternToken.PosToken("foo", true, true));
assertFalse(patternToken2.isSentenceStart());
}

public void testUnknownTag() {
final PatternToken patternToken = new PatternToken("", false, false, false);
patternToken.setPosElement(PatternToken.UNKNOWN_TAG, false, false);
patternToken.setPosToken(new PatternToken.PosToken(UNKNOWN_TAG, false, false));

final PatternToken patternToken2 = new PatternToken("", false, false, false);
patternToken2.setPosElement(PatternToken.UNKNOWN_TAG, false, true);
patternToken2.setPosToken(new PatternToken.PosToken(UNKNOWN_TAG, false, true));

final PatternToken patternToken3 = new PatternToken("", false, false, false);
patternToken3.setPosElement(PatternToken.UNKNOWN_TAG+"|VBG", true, false);
patternToken3.setPosToken(new PatternToken.PosToken(UNKNOWN_TAG + "|VBG", true, false));

final PatternToken patternToken4 = new PatternToken("", false, false, false);
patternToken4.setPosElement(PatternToken.UNKNOWN_TAG+"|VBG", true, true);
patternToken4.setPosToken(new PatternToken.PosToken(UNKNOWN_TAG + "|VBG", true, true));

final PatternToken patternToken5 = new PatternToken("\\p{Ll}+", false, true, false);
patternToken5.setPosElement(PatternToken.UNKNOWN_TAG, false, false);
patternToken5.setPosToken(new PatternToken.PosToken(UNKNOWN_TAG, false, false));

final AnalyzedToken an = new AnalyzedToken("schword", null, null);
assertTrue(patternToken.isMatched(an));
Expand All @@ -74,19 +77,19 @@ public void testUnknownTag() {
assertTrue(patternToken4.isMatched(an));
assertFalse(patternToken5.isMatched(an));

final AnalyzedToken anSentEnd = new AnalyzedToken("schword", JLanguageTool.SENTENCE_END_TAGNAME, null);
final AnalyzedToken anSentEnd = new AnalyzedToken("schword", SENTENCE_END_TAGNAME, null);
assertTrue(patternToken.isMatched(anSentEnd));
assertFalse(patternToken2.isMatched(anSentEnd));
assertTrue(patternToken3.isMatched(anSentEnd));
assertFalse(patternToken4.isMatched(anSentEnd));
assertTrue(patternToken5.isMatched(anSentEnd));

final PatternToken patternToken6 = new PatternToken("\\p{Ll}+", false, true, false);
patternToken6.setPosElement(JLanguageTool.SENTENCE_END_TAGNAME, false, false);
patternToken6.setPosToken(new PatternToken.PosToken(SENTENCE_END_TAGNAME, false, false));
assertTrue(patternToken6.isMatched(anSentEnd));

final PatternToken patternToken7 = new PatternToken("\\p{Ll}+", false, true, false);
patternToken7.setPosElement(JLanguageTool.SENTENCE_END_TAGNAME+"|BLABLA", true, false);
patternToken7.setPosToken(new PatternToken.PosToken(SENTENCE_END_TAGNAME + "|BLABLA", true, false));
assertTrue(patternToken7.isMatched(anSentEnd));

// if the AnalyzedToken is in the set of readings that have
Expand All @@ -98,7 +101,7 @@ public void testUnknownTag() {
assertTrue(patternToken4.isMatched(anSentEnd));
assertFalse(patternToken5.isMatched(anSentEnd));

final AnalyzedToken anParaEnd = new AnalyzedToken("schword", JLanguageTool.PARAGRAPH_END_TAGNAME, null);
final AnalyzedToken anParaEnd = new AnalyzedToken("schword", PARAGRAPH_END_TAGNAME, null);
assertTrue(patternToken.isMatched(anParaEnd));
assertFalse(patternToken2.isMatched(anParaEnd));
assertTrue(patternToken3.isMatched(anParaEnd));
Expand Down
Expand Up @@ -185,18 +185,18 @@ public void testUnificationNumberGender() {
final UnifierConfiguration unifierConfig = new UnifierConfiguration();

final PatternToken sgPatternToken = new PatternToken("", false, false, false);
sgPatternToken.setPosElement(".*[\\.:]sg:.*", true, false);
sgPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]sg:.*", true, false));
unifierConfig.setEquivalence("number", "singular", sgPatternToken);
final PatternToken plPatternToken = new PatternToken("", false, false, false);
plPatternToken.setPosElement(".*[\\.:]pl:.*", true, false);
plPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]pl:.*", true, false));
unifierConfig.setEquivalence("number", "plural", plPatternToken);

final PatternToken femPatternToken = new PatternToken("", false, false, false);
femPatternToken.setPosElement(".*[\\.:]f", true, false);
femPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]f", true, false));
unifierConfig.setEquivalence("gender", "feminine", femPatternToken);

final PatternToken mascPatternToken = new PatternToken("", false, false, false);
mascPatternToken.setPosElement(".*[\\.:]m", true, false);
mascPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]m", true, false));
unifierConfig.setEquivalence("gender", "masculine", mascPatternToken);

final Unifier uni = unifierConfig.createUnifier();
Expand Down Expand Up @@ -428,9 +428,9 @@ public void testMultipleFeatsWithMultipleTypes() {


private PatternToken preparePOSElement(final String posString) {
final PatternToken el = new PatternToken("", false, false, false);
el.setPosElement(posString, true, false);
return el;
final PatternToken pToken = new PatternToken("", false, false, false);
pToken.setPosToken(new PatternToken.PosToken(posString, true, false));
return pToken;
}

public void testNegation() {
Expand Down

0 comments on commit 7513464

Please sign in to comment.