diff --git a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java index 9a479ae30d..87f87e9a9e 100755 --- a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java +++ b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java @@ -910,11 +910,7 @@ public StringBuilder toTEIBody(StringBuilder buffer, } buffer.append("\t\t\n"); - SortedSet documentNoteParts = doc.getDocumentPart(SegmentationLabels.FOOTNOTE); - List notes = getTeiNotes(doc, documentNoteParts, Note.NoteType.FOOT); - - documentNoteParts = doc.getDocumentPart(SegmentationLabels.MARGINNOTE); - notes.addAll(getTeiNotes(doc, documentNoteParts, Note.NoteType.MARGIN)); + List notes = getTeiNotes(doc); buffer = toTEITextPiece(buffer, result, biblio, bds, true, layoutTokenization, figures, tables, equations, notes, markerTypes, doc, config); @@ -927,6 +923,20 @@ public StringBuilder toTEIBody(StringBuilder buffer, return buffer; } + protected List getTeiNotes(Document doc) { + // There are two types of structured notes currently supported, foot notes and margin notes. + // We consider that head notes are always only presentation matter and are never references + // in a text body. + + SortedSet documentNoteParts = doc.getDocumentPart(SegmentationLabels.FOOTNOTE); + List notes = getTeiNotes(doc, documentNoteParts, Note.NoteType.FOOT); + + documentNoteParts = doc.getDocumentPart(SegmentationLabels.MARGINNOTE); + notes.addAll(getTeiNotes(doc, documentNoteParts, Note.NoteType.MARGIN)); + + return notes; + } + protected List getTeiNotes(Document doc, SortedSet documentNoteParts, Note.NoteType noteType) { List notes = new ArrayList<>(); diff --git a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java index ab2326f566..41e5e7c52b 100644 --- a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java +++ b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java @@ -1,6 +1,6 @@ package org.grobid.core.document; -import org.grobid.core.data.Footnote; +import org.grobid.core.data.Note; import org.grobid.core.engines.EngineParsers; import org.grobid.core.engines.config.GrobidAnalysisConfig; import org.grobid.core.main.LibraryLoader; @@ -29,11 +29,11 @@ public void testGetTeiNotes() throws Exception { File input = new File(this.getClass().getResource("/footnotes/test.pdf").toURI()); Document doc = engine.getSegmentationParser().processing(DocumentSource.fromPdf(input), GrobidAnalysisConfig.defaultInstance()); - List teiNotes = new TEIFormatter(null, null).getTeiNotes(doc); + List teiNotes = new TEIFormatter(null, null).getTeiNotes(doc); assertThat(teiNotes, hasSize(1)); assertThat(teiNotes.get(0).getText(), is(" http://wikipedia.org")); - assertThat(teiNotes.get(0).getNumber(), is(1)); + assertThat(teiNotes.get(0).getLabel(), is("1")); assertThat(teiNotes.get(0).getPageNumber(), is(1)); } diff --git a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java index 3985d16866..17e15bf442 100644 --- a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java +++ b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterTest.java @@ -1,7 +1,7 @@ package org.grobid.core.document; import org.grobid.core.analyzers.GrobidAnalyzer; -import org.grobid.core.data.Footnote; +import org.grobid.core.data.Note; import org.grobid.core.layout.LayoutToken; import org.grobid.core.utilities.GrobidProperties; import org.grobid.core.utilities.LayoutTokensUtil; @@ -26,11 +26,11 @@ public void testMakeFootNote() throws Exception { String text = "1 This is a footnote"; List tokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text); - Footnote footnote = new TEIFormatter(null, null).makeFootNote(tokens, text); + Note footnote = new TEIFormatter(null, null).makeNote(tokens, text, Note.NoteType.FOOT); assertThat(footnote.getText(), is(" This is a footnote")); assertThat(LayoutTokensUtil.toText(footnote.getTokens()), is(" This is a footnote")); - assertThat(footnote.getNumber(), is(1)); + assertThat(footnote.getLabel(), is("1")); }