Skip to content

Commit

Permalink
Fix sentence detection initialisation
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Jun 19, 2023
1 parent cd16548 commit edf8bff
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
9 changes: 4 additions & 5 deletions src/main/java/org/grobid/core/engines/QuantityParser.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.grobid.core.engines;

import com.google.common.collect.Iterables;
import jakarta.inject.Inject;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
Expand All @@ -17,8 +18,6 @@
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.features.FeaturesVectorQuantities;
import org.grobid.core.lang.SentenceDetector;
import org.grobid.core.lang.impl.OpenNLPSentenceDetector;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.lexicon.QuantityLexicon;
Expand All @@ -28,7 +27,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import jakarta.inject.Inject;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -181,9 +179,10 @@ public List<Measurement> process(List<LayoutToken> layoutTokens) {
}

protected List<OffsetPosition> getSentencesOffsets(List<LayoutToken> tokens) {
SentenceDetector segmenter = new OpenNLPSentenceDetector();
SentenceUtilities segmenter = SentenceUtilities.getInstance();

String text = LayoutTokensUtil.toText(tokens);
List<OffsetPosition> results = segmenter.detect(text);
List<OffsetPosition> results = segmenter.runSentenceDetection(text);

if (CollectionUtils.isEmpty(results)) {
results = Arrays.asList(new OffsetPosition(0, text.length()));
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/org/grobid/core/utilities/TextParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class TextParser {
private AbstractPredIdentifier predicater = null;
private AbstractSRLabeler labeler = null;
private DEPReader depReader = null;
private SentenceDetector segmenter;
private SentenceUtilities segmenter;

// this is for version 1.3.0 of ClearNLP
private CRolesetClassifier roleClassifier = null;
Expand Down Expand Up @@ -120,7 +120,7 @@ private void init(String dictionaryFile, String posModelFile, String depModelFil

depReader = new DEPReader(0, 1, 2, 3, 4, 5, 6);

segmenter = new OpenNLPSentenceDetector();
segmenter = SentenceUtilities.getInstance();
}

/**
Expand Down Expand Up @@ -177,7 +177,7 @@ public synchronized List<Sentence> parseText(String text) throws GrobidException
}

List<Sentence> results = new ArrayList<>();
List<OffsetPosition> sentences = this.segmenter.detect(text);
List<OffsetPosition> sentences = this.segmenter.runSentenceDetection(text);

if (CollectionUtils.isEmpty(sentences)) {
// there is some text but not in a state so that a sentence at least can be
Expand Down Expand Up @@ -219,7 +219,7 @@ public List<Sentence> parse(BufferedReader reader) throws GrobidException {

String text = reader.lines().collect(Collectors.joining());

List<OffsetPosition> sentences = segmenter.detect(text);
List<OffsetPosition> sentences = segmenter.runSentenceDetection(text);

for (OffsetPosition sentencePosition : sentences) {
String sentence = text.substring(sentencePosition.start, sentencePosition.end);
Expand Down

0 comments on commit edf8bff

Please sign in to comment.