diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java b/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java index 44a400aed2..1a169aed35 100644 --- a/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java @@ -83,7 +83,7 @@ private static String extractContextSentence(String cont) { Matcher m = REF_PATTERN.matcher(cont); if (m.find()) { String g = m.group(1); - return m.replaceAll(g); + return m.replaceAll(Matcher.quoteReplacement(g)); } else { throw new IllegalStateException("Implementation error: no found in" + cont); } diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java new file mode 100644 index 0000000000..73da0287f6 --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java @@ -0,0 +1,20 @@ +package org.grobid.core.utilities; + +import org.apache.commons.io.IOUtils; + +import org.junit.Test; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +public class BibDataSetContextExtractorTest { + + @Test + public void testRefEscapes() throws Exception { + InputStream is = this.getClass().getResourceAsStream("/test/tei-escape.xml"); + String tei = IOUtils.toString(is, StandardCharsets.UTF_8); + is.close(); + BibDataSetContextExtractor.getCitationReferences(tei); + } + +} \ No newline at end of file diff --git a/grobid-core/src/test/resources/test/tei-escape.xml b/grobid-core/src/test/resources/test/tei-escape.xml new file mode 100644 index 0000000000..c7a52196d1 --- /dev/null +++ b/grobid-core/src/test/resources/test/tei-escape.xml @@ -0,0 +1,12 @@ + + + + +

Lorem ipsum $9,2]. +

+ +
+