Skip to content

Commit

Permalink
Fix wrong Xpath expression
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed May 4, 2024
1 parent 83c7a10 commit 39892ff
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,15 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
// offsetPositionList.add(new OffsetPosition(pos, pos + sentenceLayoutToken.size()));
// pos += sentenceLayoutToken.size();
// }
Nodes sentences = paragraph.query("//s");
Nodes sentences = paragraph.query(".//s");

if(sentences.size() == 0) {
// Overly careful - we should never end up here.
LOGGER.warn("While the configuration claim that paragraphs must be segmented, we did not find any sentence. ");
updateParagraphNodeWithAnnotations(paragraph, annotations);
}

updateNodes(sentences, annotations);
updateSentencesNodes(sentences, annotations);
} else {
updateParagraphNodeWithAnnotations(paragraph, annotations);
}
Expand Down Expand Up @@ -285,7 +285,7 @@ private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair
}
}

private static void updateNodes(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
private static void updateSentencesNodes(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
int pos = 0;
int sentenceStartOffset = 0;
for (Node sentence : sentences) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,7 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
if (StringUtils.isNotEmpty(accumulator)) {
int start = text.indexOf(accumulator.toString(), pos);
newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
pos = textPositionOfToken + 1;
pos = textPositionOfToken;
break;
}
pos = textPositionOfToken;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,32 @@ class FundingAcknowledgementParserIntegrationTest {
assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
}

@Test
fun testXmlFragmentProcessing_ErrorCase2_withSentenceSegmentation_shouldWork() {
val input ="""
<div type="acknowledgement">
<div><head>Acknowledgements</head><p><s>The authors would like to acknowledge Lucy Popplewell in the preparation of EMR notes for this study.</s></p></div>
<div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (©2014).</s><s>The copyright of the morbidity definitions/categorization lists (©2014) used in this publication is owned by Keele University, the development of which was supported by the Primary Care Research Consortium; For access/details relating to the morbidity definitions/categorisation lists (©2014) please go to www.keele.ac.uk/mrr.</s></p></div>
</div>
"""

val output ="""
<div type="acknowledgement">
<div><head>Acknowledgements</head><p><s>The authors would like to acknowledge <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lucy Popplewell</rs> in the preparation of EMR notes for this study.</s></p></div>
<div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>).</s><s>The copyright of the morbidity definitions/categorization lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) used in this publication is owned by <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Keele University</rs>, the development of which was supported by the <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Primary Care Research Consortium</rs>; For access/details relating to the morbidity definitions/categorisation lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) please go to www.keele.ac.uk/mrr.</s></p></div>
</div>
"""
val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
.withSentenceSegmentation(true)
.build()

val (element, mutableTriple) = target.processingXmlFragment(input, config)

assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
}

companion object {
@JvmStatic
@BeforeClass
Expand Down

0 comments on commit 39892ff

Please sign in to comment.