From 1ec624a0788e6bcf2fd2ec5a71838c4a604f48fd Mon Sep 17 00:00:00 2001 From: Clayton Wheeler Date: Wed, 16 May 2018 21:27:30 -0500 Subject: [PATCH] Fix BibDataSetContextExtractor to quote replacement text --- .../utilities/BibDataSetContextExtractor.java | 2 +- .../BibDataSetContextExtractorTest.java | 20 +++++++++++++++++++ .../src/test/resources/test/tei-escape.xml | 12 +++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java create mode 100644 grobid-core/src/test/resources/test/tei-escape.xml diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java b/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java index 44a400aed2..1a169aed35 100644 --- a/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/BibDataSetContextExtractor.java @@ -83,7 +83,7 @@ private static String extractContextSentence(String cont) { Matcher m = REF_PATTERN.matcher(cont); if (m.find()) { String g = m.group(1); - return m.replaceAll(g); + return m.replaceAll(Matcher.quoteReplacement(g)); } else { throw new IllegalStateException("Implementation error: no found in" + cont); } diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java new file mode 100644 index 0000000000..73da0287f6 --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/utilities/BibDataSetContextExtractorTest.java @@ -0,0 +1,20 @@ +package org.grobid.core.utilities; + +import org.apache.commons.io.IOUtils; + +import org.junit.Test; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +public class BibDataSetContextExtractorTest { + + @Test + public void testRefEscapes() throws Exception { + InputStream is = this.getClass().getResourceAsStream("/test/tei-escape.xml"); + String tei = IOUtils.toString(is, StandardCharsets.UTF_8); + is.close(); + BibDataSetContextExtractor.getCitationReferences(tei); + } + +} \ No newline at end of file diff --git a/grobid-core/src/test/resources/test/tei-escape.xml b/grobid-core/src/test/resources/test/tei-escape.xml new file mode 100644 index 0000000000..c7a52196d1 --- /dev/null +++ b/grobid-core/src/test/resources/test/tei-escape.xml @@ -0,0 +1,12 @@ + + + + +

Lorem ipsum $9,2]. +

+ +
+