diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java index 3b20d704fa..768855af7b 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java @@ -215,7 +215,7 @@ public MutablePair,List,List,List,List> annotations) { + private static void updateSentencesNodes(Nodes sentences, List> annotations) { int pos = 0; int sentenceStartOffset = 0; for (Node sentence : sentences) { diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java index 163c296046..a8ea6a7c3e 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java @@ -1586,7 +1586,7 @@ public static List matchTokenAndString(List layoutT if (StringUtils.isNotEmpty(accumulator)) { int start = text.indexOf(accumulator.toString(), pos); newPositions.add(new OffsetPosition(start, start + accumulator.toString().length())); - pos = textPositionOfToken + 1; + pos = textPositionOfToken; break; } pos = textPositionOfToken; diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt index eead71bbd4..32c96f868c 100644 --- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt +++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt @@ -132,6 +132,32 @@ class FundingAcknowledgementParserIntegrationTest { assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output)) } + @Test + fun testXmlFragmentProcessing_ErrorCase2_withSentenceSegmentation_shouldWork() { + val input =""" +
+
Acknowledgements

The authors would like to acknowledge Lucy Popplewell in the preparation of EMR notes for this study.

+
The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology

Research Group who have given us permission to utilise the morbidity definitions (©2014).The copyright of the morbidity definitions/categorization lists (©2014) used in this publication is owned by Keele University, the development of which was supported by the Primary Care Research Consortium; For access/details relating to the morbidity definitions/categorisation lists (©2014) please go to www.keele.ac.uk/mrr.

+
+ +""" + + val output =""" +
+
Acknowledgements

The authors would like to acknowledge Lucy Popplewell in the preparation of EMR notes for this study.

+
The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology

Research Group who have given us permission to utilise the morbidity definitions (©2014).The copyright of the morbidity definitions/categorization lists (©2014) used in this publication is owned by Keele University, the development of which was supported by the Primary Care Research Consortium; For access/details relating to the morbidity definitions/categorisation lists (©2014) please go to www.keele.ac.uk/mrr.

+
+ +""" + val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder() + .withSentenceSegmentation(true) + .build() + + val (element, mutableTriple) = target.processingXmlFragment(input, config) + + assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output)) + } + companion object { @JvmStatic @BeforeClass