Skip to content

Commit

Permalink
Merge branch 'master' into topic/miurahr/allow-dynamic-loadings-of-la…
Browse files Browse the repository at this point in the history
…nguage-modules
  • Loading branch information
miurahr committed Nov 19, 2023
2 parents 34db9ed + 1f57992 commit bcb07d0
Show file tree
Hide file tree
Showing 367 changed files with 378,746 additions and 192,336 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ public static void profileRulesOnText(String contents,
float timeInSeconds = time / 1000.0f;
float sentencesPerSecond = sentences.size() / timeInSeconds;
System.out.printf(Locale.ENGLISH,
"%-40s%10d%10d%10d%15.1f\n", rule.getId(),
"%-40s%10d%10d%10d%15.1f\n", rule.getFullId(),
time, sentences.size(), matchCount, sentencesPerSecond);
}

Expand Down
4 changes: 2 additions & 2 deletions languagetool-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@
<artifactId>annotations</artifactId>
</dependency>
<dependency>
<groupId>org.jetbrains.intellij.deps</groupId>
<artifactId>trove4j</artifactId>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil-core</artifactId>
</dependency>
<dependency>
<groupId>org.json</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,23 +106,23 @@ public AnalyzedTokenReadings(List<AnalyzedToken> tokens, int startPos) {
public AnalyzedTokenReadings(AnalyzedTokenReadings oldAtr, List<AnalyzedToken> newReadings, String ruleApplied) {
this(newReadings, oldAtr.getStartPos());
if (oldAtr.isSentenceEnd()) {
this.setSentEnd();
setSentEnd();
}
if (oldAtr.isParagraphEnd()) {
this.setParagraphEnd();
setParagraphEnd();
}
this.setWhitespaceBefore(oldAtr.getWhitespaceBefore());
this.setChunkTags(oldAtr.getChunkTags());
setWhitespaceBefore(oldAtr.getWhitespaceBefore());
setChunkTags(oldAtr.getChunkTags());
if (oldAtr.isImmunized()) {
this.immunize(oldAtr.getImmunizationSourceLine());
immunize(oldAtr.getImmunizationSourceLine());
}
if (oldAtr.isIgnoredBySpeller()) {
this.ignoreSpelling();
ignoreSpelling();
}
if (oldAtr.hasTypographicApostrophe()) {
this.setTypographicApostrophe();
setTypographicApostrophe();
}
this.setHistoricalAnnotations(oldAtr.getHistoricalAnnotations());
setHistoricalAnnotations(oldAtr.getHistoricalAnnotations());
addHistoricalAnnotations(oldAtr.toString(), ruleApplied);
}

Expand All @@ -147,29 +147,25 @@ public AnalyzedToken getAnalyzedToken(int idx) {
* @param posTag POS tag to look for
*/
public boolean hasPosTag(String posTag) {
boolean found = false;
for (AnalyzedToken reading : anTokReadings) {
found = posTag.equals(reading.getPOSTag());
if (found) {
break;
if (posTag.equals(reading.getPOSTag())) {
return true;
}
}
return found;
return false;
}

/**
* Checks if the token has a particular POS tag and lemma.
* @param posTag POS tag and lemma to look for
*/
public boolean hasPosTagAndLemma(String posTag, String lemma) {
boolean found = false;
for (AnalyzedToken reading : anTokReadings) {
found = posTag.equals(reading.getPOSTag()) && lemma.equals(reading.getLemma());
if (found) {
break;
if (posTag.equals(reading.getPOSTag()) && lemma.equals(reading.getLemma())) {
return true;
}
}
return found;
return false;
}

/**
Expand All @@ -187,11 +183,8 @@ public boolean hasReading() {
public boolean hasLemma(String lemma) {
boolean found = false;
for (AnalyzedToken reading : anTokReadings) {
if (reading.getLemma() != null) {
found = lemma.equals(reading.getLemma());
if (found) {
break;
}
if (reading.getLemma() != null && lemma.equals(reading.getLemma())) {
return true;
}
}
return found;
Expand All @@ -202,16 +195,14 @@ public boolean hasLemma(String lemma) {
* @param lemmas lemmas to look for
*/
public boolean hasAnyLemma(String... lemmas) {
boolean found = false;
for(String lemma : lemmas) {
for (AnalyzedToken reading : anTokReadings) {
found = lemma.equals(reading.getLemma());
if (found) {
return found;
if (lemma.equals(reading.getLemma())) {
return true;
}
}
}
return found;
return false;
}

/**
Expand All @@ -221,16 +212,12 @@ public boolean hasAnyLemma(String... lemmas) {
* @since 1.8
*/
public boolean hasPartialPosTag(String posTag) {
boolean found = false;
for (AnalyzedToken reading : anTokReadings) {
if (reading.getPOSTag() != null) {
found = reading.getPOSTag().contains(posTag);
if (found) {
break;
}
if (reading.getPOSTag() != null && reading.getPOSTag().contains(posTag)) {
return true;
}
}
return found;
return false;
}

/**
Expand All @@ -255,67 +242,57 @@ public boolean hasAnyPartialPosTag(String... posTags) {
* @since 4.0
*/
public boolean hasPosTagStartingWith(String posTag) {
boolean found = false;
for (AnalyzedToken reading : anTokReadings) {
if (reading.getPOSTag() != null) {
found = reading.getPOSTag().startsWith(posTag);
if (found) {
break;
}
if (reading.getPOSTag() != null && reading.getPOSTag().startsWith(posTag)) {
return true;
}
}
return found;
return false;
}

/**
* Checks if at least one of the readings matches a given POS tag regex.
*
* @param posTagRegex POS tag regular expression to look for
* @since 2.9
*/
public boolean matchesPosTagRegex(String posTagRegex) {
Pattern pattern = Pattern.compile(posTagRegex);
boolean found = false;
return matchesPosTagRegex(pattern);
}

/**
* Checks if at least one of the readings matches a given POS tag pattern.
* @since 6.4
*/
public boolean matchesPosTagRegex(Pattern pattern) {
for (AnalyzedToken reading : anTokReadings) {
if (reading.getPOSTag() != null) {
found = pattern.matcher(reading.getPOSTag()).matches();
if (found) {
break;
}
if (reading.getPOSTag() != null && pattern.matcher(reading.getPOSTag()).matches()) {
return true;
}
}
return found;
return false;
}

public boolean matchesChunkRegex(String chunkRegex) {
Pattern pattern = Pattern.compile(chunkRegex);
boolean found = false;
for ( ChunkTag chunk : getChunkTags()) {
if (chunk != null) {
found = pattern.matcher(chunk.getChunkTag()).matches();
if (found) {
break;
}
for (ChunkTag chunk : getChunkTags()) {
if (chunk != null && pattern.matcher(chunk.getChunkTag()).matches()) {
return true;
}
}
return found;
return false;
}

/**
* Returns the first reading that matches a given POS tag regex.
*
* @param posTagRegex POS tag regular expression to look for
* @since 5.5
*/
public AnalyzedToken readingWithTagRegex(String posTagRegex) {
Pattern pattern = Pattern.compile(posTagRegex);
boolean found = false;
for (AnalyzedToken reading : anTokReadings) {
if (reading.getPOSTag() != null) {
found = pattern.matcher(reading.getPOSTag()).matches();
if (found) {
return reading;
}
if (reading.getPOSTag() != null && pattern.matcher(reading.getPOSTag()).matches()) {
return reading;
}
}
return null;
Expand All @@ -326,13 +303,9 @@ public AnalyzedToken readingWithTagRegex(String posTagRegex) {
* @since 5.8
*/
public AnalyzedToken readingWithLemma(String lemma) {
boolean found;
for (AnalyzedToken reading : anTokReadings) {
if (reading.getLemma() != null) {
found = reading.getLemma().equals(lemma);
if (found) {
return reading;
}
if (reading.getLemma() != null && reading.getLemma().equals(lemma)) {
return reading;
}
}
return null;
Expand Down Expand Up @@ -619,16 +592,17 @@ public String getHistoricalAnnotations() {
* @param historicalAnnotations the historicalAnnotations to set
*/
private void setHistoricalAnnotations(String historicalAnnotations) {
this.historicalAnnotations = historicalAnnotations;
if (GlobalConfig.isVerbose()) {
this.historicalAnnotations = historicalAnnotations;
}
}

private void addHistoricalAnnotations(String oldValue, String ruleApplied) {
if (!ruleApplied.isEmpty()) {
if (!ruleApplied.isEmpty() && GlobalConfig.isVerbose()) {
this.historicalAnnotations = this.getHistoricalAnnotations() + "\n" + ruleApplied + ": " + oldValue + " -> "
+ this;
}
}


/**
* @since 2.3
Expand Down
36 changes: 19 additions & 17 deletions languagetool-core/src/main/java/org/languagetool/CheckResults.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,48 @@
*/
package org.languagetool;

import lombok.Getter;
import org.jetbrains.annotations.NotNull;
import org.languagetool.rules.RuleMatch;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.*;
import java.util.stream.Collectors;

/**
* @since 5.3
*/
public class CheckResults {

@Getter
private List<RuleMatch> ruleMatches;
private List<Range> ignoredRanges;
@Getter
private final List<Range> ignoredRanges;
@Getter
private final List<ExtendedSentenceRange> extendedSentenceRanges;
private final List<SentenceRange> sentenceRanges = new ArrayList<>();

public CheckResults(List<RuleMatch> ruleMatches, List<Range> ignoredRanges) {
this.ruleMatches = Objects.requireNonNull(ruleMatches);
this.ignoredRanges = Objects.requireNonNull(ignoredRanges);
}

public List<Range> getIgnoredRanges() {
return ignoredRanges;
public CheckResults(List<RuleMatch> ruleMatches, List<Range> ignoredRanges) {
this(ruleMatches, ignoredRanges, Collections.emptyList());
}

public List<RuleMatch> getRuleMatches() {
return ruleMatches;
public CheckResults(List<RuleMatch> ruleMatches, List<Range> ignoredRanges, List<ExtendedSentenceRange> extendedSentenceRanges) {
this.ruleMatches = Objects.requireNonNull(ruleMatches);
this.ignoredRanges = Objects.requireNonNull(ignoredRanges);
this.extendedSentenceRanges = Objects.requireNonNull(extendedSentenceRanges.stream().sorted().collect(Collectors.toList()));
//TODO: use this later, when we are sure the sentenceRanges (from extendedSentenceRange) are are correct.
// Right now the sentenceRanges are calculated different from those in extendedSentenceRange.
// extendedSentenceRanges.forEach(extendedSentenceRange -> this.sentenceRanges.add(new SentenceRange(extendedSentenceRange.getFromPos(), extendedSentenceRange.getToPos())));
}

@NotNull
public List<SentenceRange> getSentenceRanges() {
return sentenceRanges;
return Collections.unmodifiableList(this.sentenceRanges);
}

public void addSentenceRanges(List<SentenceRange> sentenceRanges) {
this.sentenceRanges.addAll(sentenceRanges);
}

public void setIgnoredRanges(List<Range> ignoredRanges) {
this.ignoredRanges = Objects.requireNonNull(ignoredRanges);
}

public void setRuleMatches(List<RuleMatch> ruleMatches) {
this.ruleMatches = Objects.requireNonNull(ruleMatches);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.regex.Pattern;

abstract class DynamicLanguage extends Language {

private static final Pattern DASH = Pattern.compile("-.*");

protected final String name;
protected final String code;
protected final File dictPath;
Expand All @@ -40,7 +43,7 @@ abstract class DynamicLanguage extends Language {

@Override
public String getShortCode() {
return code.replaceFirst("-.*", "");
return DASH.matcher(code).replaceFirst("");
}

@Override
Expand Down

0 comments on commit bcb07d0

Please sign in to comment.