diff --git a/grobid-core/src/main/java/org/grobid/core/data/Footnote.java b/grobid-core/src/main/java/org/grobid/core/data/Note.java similarity index 88% rename from grobid-core/src/main/java/org/grobid/core/data/Footnote.java rename to grobid-core/src/main/java/org/grobid/core/data/Note.java index 30dafd898a..caf9acf52c 100644 --- a/grobid-core/src/main/java/org/grobid/core/data/Footnote.java +++ b/grobid-core/src/main/java/org/grobid/core/data/Note.java @@ -8,7 +8,7 @@ import static com.google.common.collect.Iterables.getLast; -public class Footnote { +public class Note { public enum NoteType { FOOT, @@ -29,11 +29,11 @@ public enum NoteType { private NoteType noteType; - public Footnote() { + public Note() { this.identifier = KeyGen.getKey().substring(0, 7); } - public Footnote(String label, List tokens, String text, NoteType noteType) { + public Note(String label, List tokens, String text, NoteType noteType) { this.identifier = KeyGen.getKey().substring(0, 7); this.label = label; this.tokens = tokens; @@ -41,7 +41,7 @@ public Footnote(String label, List tokens, String text, NoteType no this.noteType = noteType; } - public Footnote(String label, List tokens, String text, int offsetStartInPage, NoteType noteType) { + public Note(String label, List tokens, String text, int offsetStartInPage, NoteType noteType) { this.identifier = KeyGen.getKey().substring(0, 7); this.label = label; this.tokens = tokens; @@ -50,7 +50,7 @@ public Footnote(String label, List tokens, String text, int offsetS this.noteType = noteType; } - public Footnote(String label, List tokens, NoteType noteType) { + public Note(String label, List tokens, NoteType noteType) { this.identifier = KeyGen.getKey().substring(0, 7); this.label = label; this.tokens = tokens; diff --git a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java index 17aa6c6319..9a479ae30d 100755 --- a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java +++ b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java @@ -911,27 +911,27 @@ public StringBuilder toTEIBody(StringBuilder buffer, buffer.append("\t\t\n"); SortedSet documentNoteParts = doc.getDocumentPart(SegmentationLabels.FOOTNOTE); - List footnotes = getTeiNotes(doc, documentNoteParts, Footnote.NoteType.FOOT); + List notes = getTeiNotes(doc, documentNoteParts, Note.NoteType.FOOT); documentNoteParts = doc.getDocumentPart(SegmentationLabels.MARGINNOTE); - footnotes.addAll(getTeiNotes(doc, documentNoteParts, Footnote.NoteType.MARGIN)); + notes.addAll(getTeiNotes(doc, documentNoteParts, Note.NoteType.MARGIN)); buffer = toTEITextPiece(buffer, result, biblio, bds, true, - layoutTokenization, figures, tables, equations, footnotes, markerTypes, doc, config); + layoutTokenization, figures, tables, equations, notes, markerTypes, doc, config); // notes are still in the body - buffer = toTEINote(buffer, footnotes, doc, markerTypes, config); + buffer = toTEINote(buffer, notes, doc, markerTypes, config); buffer.append("\t\t\n"); return buffer; } - protected List getTeiNotes(Document doc, SortedSet documentNoteParts, Footnote.NoteType noteType) { + protected List getTeiNotes(Document doc, SortedSet documentNoteParts, Note.NoteType noteType) { - List footnotes = new ArrayList<>(); + List notes = new ArrayList<>(); if (documentNoteParts == null) { - return footnotes; + return notes; } List allNotes = new ArrayList<>(); @@ -957,14 +957,14 @@ protected List getTeiNotes(Document doc, SortedSet docu allNotes.add(footText); - Footnote footNote = makeFootNote(noteTokens, footText, noteType); - footnotes.add(footNote); + Note note = makeNote(noteTokens, footText, noteType); + notes.add(note); } - return footnotes; + return notes; } - protected Footnote makeFootNote(List noteTokens, String footText, Footnote.NoteType noteType) { + protected Note makeNote(List noteTokens, String footText, Note.NoteType noteType) { Matcher ma = startNum.matcher(footText); int currentNumber = -1; @@ -999,35 +999,37 @@ protected Footnote makeFootNote(List noteTokens, String footText, F } if (currentNumber == -1) - return new Footnote(null, noteTokens, footText, noteType); + return new Note(null, noteTokens, footText, noteType); else - return new Footnote(""+currentNumber, noteTokens, footText, noteType); + return new Note(""+currentNumber, noteTokens, footText, noteType); } private StringBuilder toTEINote(StringBuilder tei, - List footnotes, + List notes, Document doc, List markerTypes, GrobidAnalysisConfig config) throws Exception { - // pattern is + // pattern is // or - // pattern is + // pattern is + + // if no note label is found, no @n attribute but we generate a random xml:id (not be used currently) - for (Footnote footnote : footnotes) { + for (Note note : notes) { Element desc = XmlBuilderUtils.teiElement("note"); - desc.addAttribute(new Attribute("place", footnote.getNoteTypeName())); - if (footnote.getLabel() != null) { - desc.addAttribute(new Attribute("n", footnote.getLabel())); + desc.addAttribute(new Attribute("place", note.getNoteTypeName())); + if (note.getLabel() != null) { + desc.addAttribute(new Attribute("n", note.getLabel())); } - if (footnote.getLabel() != null) { - addXmlId(desc, footnote.getNoteTypeName()+ "_" + footnote.getLabel()); + if (note.getLabel() != null) { + addXmlId(desc, note.getNoteTypeName()+ "_" + note.getLabel()); } else { - addXmlId(desc, footnote.getNoteTypeName()+ "_" + footnote.getIdentifier()); + addXmlId(desc, note.getNoteTypeName()+ "_" + note.getIdentifier()); } - // for labelling bibliographical references in footnotes - List noteTokens = footnote.getTokens(); + // for labelling bibliographical references in notes + List noteTokens = note.getTokens(); org.apache.commons.lang3.tuple.Pair> noteProcess = fullTextParser.processShort(noteTokens, doc); @@ -1065,13 +1067,13 @@ private StringBuilder toTEINote(StringBuilder tei, } } } else { - String footNoteText = footnote.getText(); - if (footNoteText == null) { - footNoteText = LayoutTokensUtil.toText(footnote.getTokens()); + String noteText = note.getText(); + if (noteText == null) { + noteText = LayoutTokensUtil.toText(note.getTokens()); } else { - footNoteText = footNoteText.trim(); + noteText = noteText.trim(); } - desc.appendChild(LayoutTokensUtil.normalizeText(footNoteText)); + desc.appendChild(LayoutTokensUtil.normalizeText(noteText)); } tei.append("\t\t\t"); @@ -1143,7 +1145,7 @@ public StringBuilder toTEITextPiece(StringBuilder buffer, List
figures, List tables, List equations, - List footnotes, + List notes, List markerTypes, Document doc, GrobidAnalysisConfig config) throws Exception { @@ -1257,7 +1259,7 @@ public StringBuilder toTEITextPiece(StringBuilder buffer, } else if (clusterLabel.equals(TaggingLabels.PARAGRAPH)) { int clusterPage = Iterables.getLast(cluster.concatTokens()).getPage(); - if (footnotes == null) { + if (notes == null) { String clusterContent = LayoutTokensUtil.normalizeDehyphenizeText(cluster.concatTokens()); if (isNewParagraph(lastClusterLabel, curParagraph)) { if (curParagraph != null && config.isWithSentenceSegmentation()) { @@ -1287,22 +1289,22 @@ public StringBuilder toTEITextPiece(StringBuilder buffer, curParagraphTokens = new ArrayList<>(); } - List footnotesSamePage = - footnotes.stream() + List notesSamePage = + notes.stream() .filter(f -> !f.isIgnored() && f.getPageNumber() == clusterPage) .collect(Collectors.toList()); - if (footnotesSamePage.size() > 0) { - for (Footnote footnote : footnotesSamePage) { + if (notesSamePage.size() > 0) { + for (Note note : notesSamePage) { Optional matching = cluster.concatTokens() .stream() - .filter(t -> t.getText().equals(footnote.getLabel()) && t.isSuperscript()) + .filter(t -> t.getText().equals(note.getLabel()) && t.isSuperscript()) .findFirst(); if (matching.isPresent()) { int idx = cluster.concatTokens().indexOf(matching.get()); - footnote.setIgnored(true); + note.setIgnored(true); List before = cluster.concatTokens().subList(0, idx); String clusterContentBefore = LayoutTokensUtil.normalizeDehyphenizeText(before); @@ -1320,7 +1322,7 @@ public StringBuilder toTEITextPiece(StringBuilder buffer, } ref.appendChild(matching.get().getText()); - ref.addAttribute(new Attribute("target", "#" + footnote.getNoteTypeName()+"_"+ footnote.getLabel())); + ref.addAttribute(new Attribute("target", "#" + note.getNoteTypeName()+"_"+ note.getLabel())); curParagraph.appendChild(ref); List after = cluster.concatTokens().subList(idx + 1, cluster.concatTokens().size() - 1);