Skip to content

Commit

Permalink
fix OOBE when applying sentence splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Sep 13, 2022
1 parent 053b235 commit 80b98c4
Showing 1 changed file with 9 additions and 1 deletion.
Expand Up @@ -1581,12 +1581,17 @@ protected Map<Integer, Pair<Node, String>> splitMapNodesOverSentenceSplits(Map<I
List<Integer> refPositions = mapRefNodes.keySet().stream().sorted().collect(Collectors.toList());

int currentNodeIdx = 0;
int previousSentenceOffsetStart = 0;
int previousPosInSentence = 0;
for(int i=0; i<sentencesOffsetPosition.size(); i++) {
OffsetPosition offsetPosition = sentencesOffsetPosition.get(i);
int posInSentence = 0;
int sentenceOffsetStart = offsetPosition.start;
int sentenceOffsetEnd = offsetPosition.end;

if (previousSentenceOffsetStart + previousPosInSentence < sentenceOffsetStart) {
textAccumulator.append(text, previousSentenceOffsetStart + previousPosInSentence, sentenceOffsetStart);
}
for(int j=currentNodeIdx; j<refPositions.size(); j++) {
int refPos = refPositions.get(j);
Node currentNode = mapRefNodes.get(refPos).getLeft();
Expand Down Expand Up @@ -1639,7 +1644,8 @@ protected Map<Integer, Pair<Node, String>> splitMapNodesOverSentenceSplits(Map<I
posInSentence += textChunk.length();
currentNodeIdx = j;
break;
} else if (refPos < sentenceOffsetStart && textAccumulator.length() > refPos
} else if (refPos < sentenceOffsetStart
&& textAccumulator.length() > refPos
&& textAccumulator.length() < refPos + currentNodeLength) {
//The node is between this sentence and the previous one - trouble again dude

Expand Down Expand Up @@ -1670,6 +1676,8 @@ protected Map<Integer, Pair<Node, String>> splitMapNodesOverSentenceSplits(Map<I
}
}
}
previousSentenceOffsetStart = sentenceOffsetStart;
previousPosInSentence = posInSentence;

if (sentenceOffsetStart + posInSentence < sentenceOffsetEnd) {
textAccumulator.append(text, sentenceOffsetStart + posInSentence, sentencesOffsetPosition.get(i).end);
Expand Down

0 comments on commit 80b98c4

Please sign in to comment.