Skip to content

Commit

Permalink
add if_match to allow skipping suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
arysin committed Feb 12, 2024
1 parent f379e48 commit dce0269
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 3 deletions.
Expand Up @@ -51,6 +51,8 @@ public enum IncludeRange {
private final IncludeRange includeSkipped;
// Pattern used to define parts of the matched token:
private final Pattern pRegexMatch;
// if true the suggestion will only be applied if match is successful
private boolean ifMatch;
// True if this match element is used for formatting POS token:
private final boolean setPos;

Expand Down Expand Up @@ -228,4 +230,12 @@ public IncludeRange getIncludeSkipped() {
return includeSkipped;
}

public boolean isIfMatch() {
return ifMatch;
}

public void setIfMatch(boolean ifMatch) {
this.ifMatch = ifMatch;
}

}
Expand Up @@ -24,6 +24,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
Expand All @@ -45,7 +46,10 @@
* @since 2.3
*/
public class MatchState {

// use private Unicode char for suggestions to filter out
// we can't use empty string as existing tests expect it and it's hard to signal skipping in other ways
static final String DONT_APPLY = "\uE120";

private final Match match;
private final Synthesizer synthesizer;

Expand Down Expand Up @@ -224,7 +228,11 @@ public final String[] toFinalString(Language lang) throws IOException {
if (lang != null && lang.getShortCode().equals("ar")) {
formattedString[0] = StringTools.removeTashkeel(formattedString[0]);
}
formattedString[0] = pRegexMatch.matcher(formattedString[0]).replaceAll(regexReplace);
Matcher matcher = pRegexMatch.matcher(formattedString[0]);
if( match.isIfMatch() && ! matcher.find() ) {
return new String[] { DONT_APPLY };
}
formattedString[0] = matcher.replaceAll(regexReplace);
}

String posTag = match.getPosTag();
Expand Down
Expand Up @@ -30,6 +30,7 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

//import org.slf4j.Logger;
//import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -394,7 +395,10 @@ static String formatMultipleSynthesis(String[] matches,
}
int lastLeftSugEnd = leftSide.indexOf(RuleMatch.SUGGESTION_END_TAG);
int lastLeftSugStart = leftSide.lastIndexOf(RuleMatch.SUGGESTION_START_TAG);
StringBuilder sb = new StringBuilder();

matches = filterOutEmpty(matches);

StringBuilder sb = new StringBuilder(leftSide.length() + rightSide.length() + 40 * matches.length);
sb.append(errorMessage);
for (int z = 0; z < matches.length; z++) {
sb.append(suggestionLeft);
Expand Down Expand Up @@ -443,6 +447,12 @@ private String[] concatMatches(int start, int index,
return finalMatch;
}

private static String[] filterOutEmpty(String[] finalMatch) {
return Stream.of(finalMatch)
.filter(m -> ! MatchState.DONT_APPLY.equals(m))
.toArray(String[]::new);
}

private int phraseLen(int i) {
PatternRule rule = (PatternRule) this.rule;
List<Integer> elementNo = rule.getElementNo();
Expand Down
Expand Up @@ -390,6 +390,7 @@ protected void setMatchElement(Attributes attrs, boolean isSuppressMisspelled) t
caseConversion, YES.equals(attrs.getValue("setpos")),
isSuppressMisspelled,
includeRange);
mWorker.setIfMatch(YES.equals(attrs.getValue("if_match")));
mWorker.setInMessageOnly(!inSuggestion);
if (inMessage) {
suggestionMatches.add(mWorker);
Expand Down
Expand Up @@ -101,6 +101,7 @@
<xs:complexType mixed="true">
<xs:attribute name="no" type="xs:nonNegativeInteger" use="required" />
<xs:attribute name="regexp_match" type="xs:string" use="optional" />
<xs:attribute name="if_match" type="xs:string" use="optional" />
<xs:attribute name="regexp_replace" type="xs:string" use="optional" />
<xs:attribute name="postag_regexp" type="binaryYesNo" use="optional" default="no" />
<xs:attribute name="postag" type="xs:string" use="optional" />
Expand Down
Expand Up @@ -139,6 +139,12 @@ public void testFormatMultipleSynthesis() throws Exception {
assertEquals("This is how you should write: <suggestion>test</suggestion>, <suggestion> </suggestion>.",
PatternRuleMatcher.formatMultipleSynthesis(suggestions2,
"This is how you should write: <suggestion>", "</suggestion>."));

String[] suggestions3 = { "test", MatchState.DONT_APPLY };

assertEquals("This is how you should write: <suggestion>test</suggestion>.",
PatternRuleMatcher.formatMultipleSynthesis(suggestions3,
"This is how you should write: <suggestion>", "</suggestion>."));
}

private PatternRule makePatternRule(String s) {
Expand Down
Expand Up @@ -758,6 +758,8 @@ private void assertSuggestions(String sentence, List<String> expectedCorrections
}
}
List<String> realSuggestions = matches.get(0).getSuggestedReplacements();
realSuggestions = new ArrayList<>(realSuggestions);
realSuggestions.removeIf(s -> MatchState.DONT_APPLY.equals(s));
if (realSuggestions.isEmpty()) {
boolean expectedEmptyCorrection = expectedCorrections.size() == 1 && expectedCorrections.get(0).length() == 0;
if (!expectedEmptyCorrection) {
Expand Down
Expand Up @@ -377,6 +377,16 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example correction="geheißen">Sie hat früher Müller <marker>gehießen</marker>.</example>
</rule>
</rulegroup>
<!-- we're ignoring two matches, and using only one -->
<rule id="TEST_IF_MATCH" name="If match test">
<pattern>
<token>token</token>
</pattern>
<message>You could use: <match no="1" if_match="yes" regexp_match="t(.*)x" regexp_replace="b$1"/></message>
<suggestion><match no="1" if_match="yes" regexp_match="t(.*)n" regexp_replace="moke"/></suggestion>
<suggestion><match no="1" if_match="yes" regexp_match="t(.*)x" regexp_replace="l$1"/></suggestion>
<example correction="moke">It's a <marker>token</marker>.</example>
</rule>
</category>

<category id="OTHER" name="otherCategory" type="addition">
Expand Down

3 comments on commit dce0269

@danielnaber
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arysin
Copy link
Contributor Author

@arysin arysin commented on dce0269 Feb 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also document this at https://dev.languagetool.org/development-overview (i.e. https://github.com/languagetool-org/languagetool-org.github.io/blob/master/development-overview.md)?

ERROR: Permission to languagetool-org/languagetool-org.github.io.git denied to arysin.

@danielnaber
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ERROR: Permission to languagetool-org/languagetool-org.github.io.git denied to arysin.

Could you create a PR? This way, we avoid chaos with the permissions.

Please sign in to comment.