Skip to content

Commit

Permalink
avoid empty aligned layout token sentences
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 22, 2021
1 parent f52cf04 commit 675c861
Showing 1 changed file with 5 additions and 3 deletions.
Expand Up @@ -1433,8 +1433,10 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
if (currentSentenceTokens.size() > 0) {
segmentedParagraphTokens.add(currentSentenceTokens);
currentSentenceIndex++;
if (currentSentenceIndex >= theSentences.size())
if (currentSentenceIndex >= theSentences.size()) {
currentSentenceTokens = new ArrayList<>();
break;
}
sentenceChunk = text.substring(theSentences.get(currentSentenceIndex).start, theSentences.get(currentSentenceIndex).end);
}
currentSentenceTokens = new ArrayList<>();
Expand All @@ -1451,7 +1453,7 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
segmentedParagraphTokens.add(currentSentenceTokens);
}

if (segmentedParagraphTokens.size() != theSentences.size()) {
/*if (segmentedParagraphTokens.size() != theSentences.size()) {
System.out.println("ERROR, segmentedParagraphTokens size:" + segmentedParagraphTokens.size() + " vs theSentences size: " + theSentences.size());
System.out.println(text);
System.out.println(theSentences.toString());
Expand All @@ -1465,7 +1467,7 @@ public void segmentIntoSentences(Element curParagraph, List<LayoutToken> curPara
System.out.println(segmentedParagraphToken);
k++;
}
}
}*/
}

// update the xml paragraph element
Expand Down

0 comments on commit 675c861

Please sign in to comment.